Merging Smart-seq2 and 10X Datasets
We have quality-controlled the 10X data and the SS2 data and now are left with the following objects:
10X 5K data - pb_sex_filtered
10X 30K data - pb_30k_sex_filtered
SS2 mutant data - ss2_mutants_final
[1] "patchwork is loaded correctly"
[1] "viridis is loaded correctly"
[1] "Seurat is loaded correctly"
[1] "cowplot is loaded correctly"
[1] "gridExtra is loaded correctly"
[1] "grid is loaded correctly"
[1] "Hmisc is loaded correctly"
[1] "reshape2 is loaded correctly"
[1] "dplyr is loaded correctly"
[1] "Nebulosa is loaded correctly"
[1] "monocle3 is loaded correctly"
screen hits
## EDIT - change this to the excel table once we have it finalized for the screen
screen_hits <- c("PBANKA-0516300",
"PBANKA-1217700",
"PBANKA-0409100",
"PBANKA-1034300",
"PBANKA-1437500",
"PBANKA-0827500",
"PBANKA-0824300",
"PBANKA-1426900",
"PBANKA-0105300",
"PBANKA-0921100",
"PBANKA-1002400",
"PBANKA-0829400",
"PBANKA-1347200",
"PBANKA-0828000",
"PBANKA-0902300",
"PBANKA-1418100",
"PBANKA-1435200",
"PBANKA-1454800",
"PBANKA-0712300",
"PBANKA-0410500",
"PBANKA-1144800",
"PBANKA-1231600",
"PBANKA-0503200",
"PBANKA-0308900",
"PBANKA-1214700",
"PBANKA-0709900",
"PBANKA-0311900",
"PBANKA-0716500",
"PBANKA-1447900",
"PBANKA-0102200",
"PBANKA-0713500",
"PBANKA-0102400",
"PBANKA-1302700",
"PBANKA-1235900",
"PBANKA-0401100",
"PBANKA-0413400",
"PBANKA-1126900",
"PBANKA-1425900",
"PBANKA-0418300",
"PBANKA-1464600",
"PBANKA-0806000")
load in datasets
## load the 10X dataset
pb_sex_filtered <- readRDS("../data_to_export/pb_sex_filtered.RDS")
## load the SS2 dataset
ss2_mutants_final <- readRDS("../data_to_export/ss2_mutants_final.RDS")
## inspect
paste("10x dataset")
[1] "10x dataset"
pb_sex_filtered
An object of class Seurat
5098 features across 6191 samples within 1 assay
Active assay: RNA (5098 features, 2000 variable features)
2 dimensional reductions calculated: pca, umap
paste("Smart-seq2 dataset")
[1] "Smart-seq2 dataset"
ss2_mutants_final
An object of class Seurat
5245 features across 2717 samples within 1 assay
Active assay: RNA (5245 features, 2000 variable features)
2 dimensional reductions calculated: pca, umap
paste("The composition of the Smart-seq2 dataset is:")
[1] "The composition of the Smart-seq2 dataset is:"
table(ss2_mutants_final@meta.data$genotype)
Mutant WT
2028 689
## extract 10x data
tenx_5k_counts <- as.matrix(pb_sex_filtered@assays$RNA@counts)
tenx_5k_pheno <- pb_sex_filtered@meta.data
## Create fresh object
tenx_5k_counts_to_integrate <- CreateSeuratObject(counts = tenx_5k_counts, meta.data = tenx_5k_pheno, min.cells = 0, min.features = 0, project = "GCSKO")
Invalid name supplied, making object name syntactically valid. New object name is orig.identnCount_RNAnFeature_RNAexperimentRNA_snn_res.1seurat_clusterspANN_0.25_0.01_440DF.classifications_0.25_0.01_440Prediction.Spearman.r.Spearman.Prediction.Pearsons.r.Pearsons.Prediction.Spearman._Kasiar.Spearman._KasiaPrediction.Pearson._Kasiar.Pearson._KasiaRNA_snn_res.2RNA_snn_res.3; see ?make.names for more details on syntax validity
## add experiment meta data
tenx_5k_counts_to_integrate@meta.data$experiment <- "tenx_5k"
## inspect
tenx_5k_counts_to_integrate
An object of class Seurat
5098 features across 6191 samples within 1 assay
Active assay: RNA (5098 features, 0 variable features)
We need to make sure the mutant data is compatible with the 10X data. the 10X data has fewer genes represented so we need to find the intersect of the two before integration.
## extract SS2 data
mutant_counts_for_integration <- as.matrix(ss2_mutants_final@assays$RNA@counts)
mutant_pheno_for_integration <- ss2_mutants_final@meta.data
## change counts so the :rRNA and :tRNA are not there:
rownames(mutant_counts_for_integration) <- gsub(":ncRNA", "", gsub(":rRNA", "", gsub(":tRNA", "", rownames(mutant_counts_for_integration))))
## change the gene names so that they are - rather than _:
rownames(mutant_counts_for_integration) <- gsub("_", "-", rownames(mutant_counts_for_integration))
## calculate how many of the genes overlap - 10x does start out with 5098 vs 5245
genes_in_tenx_dataset <- intersect(rownames(tenx_5k_counts), rownames(mutant_counts_for_integration))
## print number of genes that overlap
dim(mutant_counts_for_integration)
[1] 5245 2717
## subset the mutant counts to contain only 10x genes
mutant_counts_for_integration <- mutant_counts_for_integration[which(rownames(mutant_counts_for_integration) %in% genes_in_tenx_dataset), ]
## print result of genes that overlap
dim(mutant_counts_for_integration)
[1] 5018 2717
## make Seurat object:
GCSKO_mutants <- CreateSeuratObject(counts = mutant_counts_for_integration, meta.data = mutant_pheno_for_integration, min.cells = 0, min.features = 0, project = "GCSKO")
## add experiment meta data
GCSKO_mutants@meta.data$experiment <- "mutants"
## inspect
GCSKO_mutants
An object of class Seurat
5018 features across 2717 samples within 1 assay
Active assay: RNA (5018 features, 0 variable features)
## double check that this is the same number of genes
## subset counts so that only genes represented in the other two objects are there:
length(intersect(rownames(tenx_5k_counts), rownames(mutant_counts_for_integration)))
[1] 5018
create list and normalise:
## make list
tenx.mutant.list <- list(tenx_5k_counts_to_integrate, GCSKO_mutants)
## prepare data
for (i in 1:length(tenx.mutant.list)) {
tenx.mutant.list[[i]] <- NormalizeData(tenx.mutant.list[[i]], verbose = FALSE)
tenx.mutant.list[[i]] <- FindVariableFeatures(tenx.mutant.list[[i]], selection.method = "vst",
nfeatures = 2000, verbose = FALSE)
}
## Find anchors
tenx.mutant.anchors <- FindIntegrationAnchors(object.list = tenx.mutant.list, dims = 1:21, verbose = FALSE)
UNRELIABLE VALUE: Future (‘future_lapply-1’) unexpectedly generated random numbers without specifying argument '[future.]seed'. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify argument '[future.]seed', e.g. 'seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use [future].seed=NULL, or set option 'future.rng.onMisuse' to "ignore".
## Integrate data
tenx.mutant.integrated <- IntegrateData(anchorset = tenx.mutant.anchors, dims = 1:21, verbose = FALSE, features.to.integrate = genes_in_tenx_dataset)
Adding a command log without an assay associated with it
## Make the default assay integrated
DefaultAssay(tenx.mutant.integrated) <- "integrated"
## Run the standard workflow for visualization and clustering
tenx.mutant.integrated <- ScaleData(tenx.mutant.integrated, verbose = FALSE)
tenx.mutant.integrated <- RunPCA(tenx.mutant.integrated, npcs = 30, verbose = FALSE)
## inspect PCs
ElbowPlot(tenx.mutant.integrated, ndims = 30, reduction = "pca")
Run inital UMAP
## Run UMAP
tenx.mutant.integrated <- RunUMAP(tenx.mutant.integrated, reduction = "pca", dims = 1:8, n.neighbors = 50, seed.use = 1234, min.dist = 0.5, repulsion.strength = 0.05)
The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session11:12:13 UMAP embedding parameters a = 0.583 b = 1.334
11:12:14 Read 8908 rows and found 8 numeric columns
11:12:14 Using Annoy for neighbor search, n_neighbors = 50
11:12:14 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
11:12:17 Writing NN index file to temp file /var/folders/7t/kvh8b3952x9_4b3r74r1kstc000glh/T//Rtmp7pypgE/file63f264e8ad4
11:12:17 Searching Annoy index using 1 thread, search_k = 5000
11:12:22 Annoy recall = 100%
11:12:53 Commencing smooth kNN distance calibration using 1 thread
11:12:56 Initializing from normalized Laplacian + noise
11:12:57 Commencing optimization for 500 epochs, with 583548 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
11:13:15 Optimization finished
See distribution by: altogether, experiment, and mutant ID
## Plot
DimPlot(tenx.mutant.integrated, reduction = "umap", pt.size = 0.01)
DimPlot(tenx.mutant.integrated, reduction = "umap", split.by = "experiment", pt.size = 0.01)
DimPlot(tenx.mutant.integrated, reduction = "umap", group.by = "identity_updated", label = TRUE, repel = TRUE, pt.size = 0.01)
After optimisation, the following UMAP can be calculated:
## Run optimised UMAP
tenx.mutant.integrated <- RunUMAP(tenx.mutant.integrated, reduction = "pca", dims = 1:10, n.neighbors = 150, seed.use = 1234, min.dist = 0.4, repulsion.strength = 0.03, local.connectivity = 150)
11:14:30 UMAP embedding parameters a = 0.7669 b = 1.223
11:14:30 Read 8908 rows and found 10 numeric columns
11:14:30 Using Annoy for neighbor search, n_neighbors = 150
11:14:30 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
11:14:33 Writing NN index file to temp file /var/folders/7t/kvh8b3952x9_4b3r74r1kstc000glh/T//Rtmp7pypgE/file63f6e7c77f1
11:14:33 Searching Annoy index using 1 thread, search_k = 15000
11:14:46 Annoy recall = 100%
11:14:47 Commencing smooth kNN distance calibration using 1 thread
11:14:48 8908 smooth knn distance failures
11:14:52 Initializing from normalized Laplacian + noise
11:14:54 Commencing optimization for 500 epochs, with 1813006 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
11:20:09 Optimization finished
## plot
dp1 <- DimPlot(tenx.mutant.integrated, label = TRUE, repel = FALSE, pt.size = 0.05, dims = c(2,1), group.by = "experiment") +
## fix the axis
coord_fixed() +
## reverse the scale
scale_y_reverse()
## view
dp1
Now store these reversed embeddings in a new slot
## extract the cell embeddings from the UMAP
mds <- as.data.frame(tenx.mutant.integrated@reductions$umap@cell.embeddings)
## change the coordinates of UMAP 1 so they are reversed
mds$UMAP_1 <- -mds$UMAP_1
## change names of the cols
colnames(mds) <- paste0("DIM_UMAP_", 1:2)
## make into a matrix so that it can be saved in Seurat
mds <- as.matrix(mds)
## store this optimsed UMAP in a custom dim slot
tenx.mutant.integrated[["DIM_UMAP"]] <- CreateDimReducObject(embeddings = mds, key = "DIM_UMAP_", assay = DefaultAssay(tenx.mutant.integrated))
Keys should be one or more alphanumeric characters followed by an underscore, setting key from DIM_UMAP_ to DIMUMAP_All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to DIMUMAP_
## check
DimPlot(tenx.mutant.integrated, label = TRUE, repel = FALSE, pt.size = 0.05, dims = c(2,1), reduction = "DIM_UMAP") + coord_fixed()
## run a new UMAP with
tenx.mutant.integrated <- RunUMAP(tenx.mutant.integrated, reduction = "pca", dims = 1:10, n.components = 10)
13:57:24 UMAP embedding parameters a = 0.9922 b = 1.112
13:57:24 Read 8908 rows and found 10 numeric columns
13:57:24 Using Annoy for neighbor search, n_neighbors = 30
13:57:24 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
13:57:26 Writing NN index file to temp file /var/folders/7t/kvh8b3952x9_4b3r74r1kstc000glh/T//Rtmp7pypgE/file63f25261122
13:57:26 Searching Annoy index using 1 thread, search_k = 3000
13:57:30 Annoy recall = 100%
13:57:36 Commencing smooth kNN distance calibration using 1 thread
13:57:40 Initializing from normalized Laplacian + noise
13:57:41 Commencing optimization for 500 epochs, with 366720 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
13:57:59 Optimization finished
## generate new clusters at low resolution
tenx.mutant.integrated <- FindNeighbors(tenx.mutant.integrated, dims = 1:10, reduction = "umap")
Computing nearest neighbor graph
Computing SNN
tenx.mutant.integrated <- FindClusters(tenx.mutant.integrated, resolution = 0.5, random.seed = 42, algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8908
Number of edges: 214651
Running Louvain algorithm with multilevel refinement...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9475
Number of communities: 26
Elapsed time: 0 seconds
[1] 26
plot
## this function writes the next bit of code for you
## put it into the console and paste the response
#ploty <- c()
#for(i in seq_along(levels(tenx.mutant.integrated@meta.data$seurat_clusters))){
# ploty <- paste0(ploty, "list_UMAPs_by_cluster[[", i, "]]", " + ")
#}
## plot
list_UMAPs_by_cluster[[1]] + list_UMAPs_by_cluster[[2]] + list_UMAPs_by_cluster[[3]] + list_UMAPs_by_cluster[[4]] + list_UMAPs_by_cluster[[5]] + list_UMAPs_by_cluster[[6]] + list_UMAPs_by_cluster[[7]] + list_UMAPs_by_cluster[[8]] + list_UMAPs_by_cluster[[9]] + list_UMAPs_by_cluster[[10]] + list_UMAPs_by_cluster[[11]] + list_UMAPs_by_cluster[[12]] + list_UMAPs_by_cluster[[13]] + list_UMAPs_by_cluster[[14]] + list_UMAPs_by_cluster[[15]] + list_UMAPs_by_cluster[[16]] + list_UMAPs_by_cluster[[17]] + list_UMAPs_by_cluster[[18]] + list_UMAPs_by_cluster[[19]] + list_UMAPs_by_cluster[[20]] + list_UMAPs_by_cluster[[21]] + list_UMAPs_by_cluster[[22]] + list_UMAPs_by_cluster[[23]] + list_UMAPs_by_cluster[[24]] + list_UMAPs_by_cluster[[25]] + list_UMAPs_by_cluster[[26]]
We will look in more detail at cells as they enter the sexual trajecotry later. The PCA clustering will be more appropriate in this high-resolution view. In order to subset these cells, we will use the UMAP clustering.
## generate final clusters which will be written into the 'seurat.clusters' slot in meta data
tenx.mutant.integrated <- FindNeighbors(tenx.mutant.integrated, dims = 1:10, reduction = "umap")
Computing nearest neighbor graph
Computing SNN
tenx.mutant.integrated <- FindClusters(tenx.mutant.integrated, resolution = 0.5, random.seed = 42, algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8908
Number of edges: 214651
Running Louvain algorithm with multilevel refinement...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9475
Number of communities: 26
Elapsed time: 0 seconds
## Plot
DimPlot(tenx.mutant.integrated, label = TRUE, repel = TRUE, pt.size = 0.05, group.by = "seurat_clusters", dims = c(2,1), reduction = "DIM_UMAP") + coord_fixed()
We will get some high level insight into these clusters now
## TODO - move this to once clusters are identified
v1 <- VlnPlot(object = tenx.mutant.integrated, features = "nFeature_RNA", group.by = "seurat_clusters", pt.size = 0.01)
v2 <- VlnPlot(object = tenx.mutant.integrated, features = "nCount_RNA", group.by = "seurat_clusters", pt.size = 0.01)
v1 + v2
We have defined clusters, now we will identify what the clusters correspond to. We can use a number of external datasets to do this:
known marker genes
bulk RNA-seq data correlation
## make plots
plots <- FeaturePlot(tenx.mutant.integrated, features = c("PBANKA-1319500", "PBANKA-0416100"), blend = TRUE, combine = FALSE, coord.fixed = TRUE, dims = c(2,1), reduction = "DIM_UMAP")
# Get just the co-expression plot, built-in legend is meaningless for this plot
#plots[[3]] + NoLegend()
# Get just the key
#plots[[4]]
# Stitch the co-expression and key plots together
plots[[3]] + NoLegend() + plots[[4]]/plot_spacer() + plot_layout(widths = c(2,1))
marker genes plots
## find a good ring marker, to see if there is a better one than the ones reported
#markers_ring <- FindMarkers(tenx.mutant.integrated, ident.1 = c("4", "5", "16", "11", "7", "3", "9", "0", "22"))
#head(markers_ring)
# PBANKA-1319500 - CCP2 - female - used in 820 line
# PBANKA-0416100 - MG1 - dynenin heavy chain - male - used in 820 line
# PBANKA-1437500 - AP2G - commitment
# PBANKA-0831000 - MSP1 - late asexual
# PBANKA-1102200 - MSP8 - early asexual (from Bozdech paper)
# PBANKA-0711900 - HSP70 - promoter used for GFP and RFP expression in the mutants
# PBANKA-1400400 - FAMB - ring marker - discovered by looking for marker genes in data
# PBANKA-0722600 - Fam-b2 - ring marker - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5113031/
marker_gene_plot_CCP2 <- FeaturePlot(tenx.mutant.integrated, features = "PBANKA-1319500", coord.fixed = TRUE, min.cutoff = "q1", dims = c(2,1), reduction = "DIM_UMAP", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("CCP2 (Female)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30)))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
marker_gene_plot_MG1 <- FeaturePlot(tenx.mutant.integrated, features = "PBANKA-0416100", coord.fixed = TRUE, min.cutoff = "q1", dims = c(2,1), reduction = "DIM_UMAP", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("MG1 (Male)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30)))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
marker_gene_plot_AP2G <- FeaturePlot(tenx.mutant.integrated, features = "PBANKA-1437500", coord.fixed = TRUE, min.cutoff = "q1", dims = c(2,1), reduction = "DIM_UMAP", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("AP2G (Commitment)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30)))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
marker_gene_plot_MSP1 <- FeaturePlot(tenx.mutant.integrated, features = "PBANKA-0831000", coord.fixed = TRUE, min.cutoff = "q1", dims = c(2,1), reduction = "DIM_UMAP", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("MSP1 (Schizont)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30)))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
marker_gene_plot_MSP8 <- FeaturePlot(tenx.mutant.integrated, features = "PBANKA-1102200", coord.fixed = TRUE, min.cutoff = "q1", dims = c(2,1), reduction = "DIM_UMAP", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("MSP8 (Asexual)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30)))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
marker_gene_plot_SBP1 <- FeaturePlot(tenx.mutant.integrated, features = "PBANKA-1101300", coord.fixed = TRUE, min.cutoff = "q1", dims = c(2,1), reduction = "DIM_UMAP", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("SBP1 (Ring)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30)))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
marker_gene_plot_FAMB <- FeaturePlot(tenx.mutant.integrated, features = "PBANKA-0722600", coord.fixed = TRUE, min.cutoff = "q1", dims = c(2,1), reduction = "DIM_UMAP", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("Fam-b2 (Ring)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30)))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
marker_gene_plot_HSP70 <- FeaturePlot(tenx.mutant.integrated, features = "PBANKA-0711900", coord.fixed = TRUE, min.cutoff = "q1", dims = c(2,1), reduction = "DIM_UMAP", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("(HSP70; Reporter)","\n", "PBANKA_0711900")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30)))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
##original label:
# labs(title = paste("(CCP2; Female)","\n", "PBANKA_1319500"))
## plot
## cowplot method
marker_gene_plot_all <- plot_grid(marker_gene_plot_FAMB, marker_gene_plot_MSP8, marker_gene_plot_MSP1, marker_gene_plot_AP2G, marker_gene_plot_CCP2, marker_gene_plot_MG1, marker_gene_plot_HSP70, nrow=3)
font family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font database
marker_gene_plot_all
## patchwork method
#marker_gene_plot_FAMB + marker_gene_plot_MSP8 + marker_gene_plot_MSP1 + marker_gene_plot_AP2G + marker_gene_plot_CCP2 + marker_gene_plot_MG1 + marker_gene_plot_HSP70
# PBANKA-1319500 - CCP2 - female - used in 820 line
# PBANKA-0416100 - MG1 - dynenin heavy chain - male - used in 820 line
# PBANKA-1437500 - AP2G - commitment
# PBANKA-0831000 - MSP1 - late asexual
# PBANKA-1102200 - MSP8 - early asexual (from Bozdech paper)
# PBANKA-0711900 - HSP70 - promoter used for GFP and RFP expression in the mutants
# PBANKA-1400400 - FAMB - ring marker - discovered by looking for marker genes in data
# PBANKA-0722600 - Fam-b2 - ring marker - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5113031/
markers_list <- c("PBANKA-1319500", "PBANKA-0416100", "PBANKA-1437500", "PBANKA-0831000", "PBANKA-1102200", "PBANKA-0711900", "PBANKA-1400400", "PBANKA-0722600")
list_of_density_plots <- plot_density(tenx.mutant.integrated, markers_list, joint = FALSE, combine = FALSE, dims = c(2,1), pal = "plasma")
list_of_density_plots[[1]] + coord_fixed() + list_of_density_plots[[2]] + list_of_density_plots[[3]] + list_of_density_plots[[4]] + list_of_density_plots[[5]] + list_of_density_plots[[6]] + list_of_density_plots[[7]] + list_of_density_plots[[8]]
#plot_density(tenx.mutant.integrated, "PBANKA-0416100")
Mutant cells could have aberant expression so we will use only wild-type cells:
make a metadata column where the 10X data is classified as a WT genotype
## get cells that are filtered out
cells_10x <- which(tenx.mutant.integrated@meta.data$experiment == "tenx_5k")
## make extra column in plotting df
tenx.mutant.integrated@meta.data$genotype_combined <- tenx.mutant.integrated@meta.data$genotype
tenx.mutant.integrated@meta.data$genotype_combined[cells_10x] <- "WT"
## inspect
table(tenx.mutant.integrated@meta.data$genotype_combined)
Mutant WT
2028 6880
plot_density(tenx.mutant.integrated.wt, markers_list, joint = FALSE, dims = c(2,1), pal = "plasma")
Then define each cluster as Male, Female or Asexual:
## copy clusters to new column
tenx.mutant.integrated@meta.data$cluster_colours_figure <- NA
## define which clusters will be which identity
male_clusters <- c("22", "18", "9", "16")
female_clusters <- c("19","24","20","3")
asex_clusters <- c("8","1","0","4","2","6","7","5","12","14","15","10","23","13","17","21","25")
bipotential_clusters <- c("11")
## check length of the unique entries in the manualy created list above and the number of clusters in total
paste("Is the total number of clusters in the list the same as the number of clusters in the slot?", identical(length(unique(c(male_clusters, female_clusters, asex_clusters, bipotential_clusters))), length(levels(tenx.mutant.integrated@meta.data$seurat_clusters))))
[1] "Is the total number of clusters in the list the same as the number of clusters in the slot? TRUE"
## change the column IDs
tenx.mutant.integrated@meta.data$cluster_colours_figure[which(tenx.mutant.integrated@meta.data$seurat_clusters %in% male_clusters)] <- "Male"
tenx.mutant.integrated@meta.data$cluster_colours_figure[which(tenx.mutant.integrated@meta.data$seurat_clusters %in% female_clusters)] <- "Female"
tenx.mutant.integrated@meta.data$cluster_colours_figure[which(tenx.mutant.integrated@meta.data$seurat_clusters %in% asex_clusters)] <- "Asexual"
tenx.mutant.integrated@meta.data$cluster_colours_figure[which(tenx.mutant.integrated@meta.data$seurat_clusters %in% bipotential_clusters)] <- "Bipotential"
table(tenx.mutant.integrated@meta.data$cluster_colours_figure)
Asexual Bipotential Female Male
6934 233 954 787
useful tools for all plots
## define male and female symbol
female_symbol <- intToUtf8(9792)
male_symbol <- intToUtf8(9794)
## make a custom pal
# 1 = blue - "#0052c5"
# 2 = red - "#a52b1e"
# 3 = green - "#016c00"
# 4 = yellow - "#ffe400"
pal_sex <- c("#0052c5","#ffe400", "#a52b1e", "#016c00")
UMAP_identity <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.5, group.by = "cluster_colours_figure", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_colour_manual(values=pal_sex) +
theme_void() +
theme(legend.position = "none")
## print
UMAP_identity
save
ggsave("../images_to_export/ALLCELLS_merge_UMAP_identity.png", plot = UMAP_identity, device = "png", path = NULL, scale = 1, width = 20, height = 20, units = "cm", dpi = 300, limitsize = TRUE)
## Plot
umap_cluster <- DimPlot(tenx.mutant.integrated, label = TRUE, label.size = 8, repel = FALSE, pt.size = 0.5, dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
theme(legend.position="bottom",
axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank()) +
guides(colour=guide_legend(nrow = 3, byrow = TRUE, override.aes = list(size=10)))
## print
umap_cluster
save
ggsave("../images_to_export/ALLCELLS_merge_UMAP_cluster.png", plot = umap_cluster, device = "png", path = NULL, scale = 1, width = 20, height = 20, units = "cm", dpi = 300, limitsize = TRUE)
## make plots
## hoo dataset correlation
UMAP_hoo <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, group.by = "Prediction.Spearman.", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
theme_void() +
labs(title = paste("Hoo Predicted Timepoint")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_manual(values = inferno(12)) +
labs(colour = "hour") +
theme(legend.position = "bottom", legend.title=element_text(size=10))
## ap2g timecourse in this paper correlation
UMAP_kasia <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, group.by = "Prediction.Spearman._Kasia", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
theme_void() +
labs(title = paste("AP2G Timecourse Predicted Timepoint")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_manual(values = inferno(10)) +
labs(colour = "hour") +
theme(legend.position = "bottom", legend.title=element_text(size=10))
## combine
umap_bulk <- wrap_plots(UMAP_hoo, UMAP_kasia, ncol = 2)
## print
umap_bulk
ggsave("../images_to_export/ALLCELLS_merge_umap_bulk_prediction.png", plot = umap_bulk, device = "png", path = NULL, scale = 1, width = 30, height = 10, units = "cm", dpi = 300, limitsize = TRUE)
The original method of plotting by experiment does not allow much customisation of the plots. I.e. we cannot easily change the titles of each plot
## Plot
DimPlot(tenx.mutant.integrated, label = TRUE, repel = TRUE, pt.size = 0.5, split.by = "experiment", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
theme(legend.position="bottom", axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
But, we can use the following code to do this
## make an extra meta.data column so you can split the object by SS2 mutant, SS2 WT, 10X
## make new column in meta.data
tenx.mutant.integrated@meta.data$sub_genotype <- tenx.mutant.integrated@meta.data$genotype
## replace NA values from 10X data with a value
tenx.mutant.integrated@meta.data$sub_genotype[is.na(tenx.mutant.integrated@meta.data$sub_genotype)] <- "10X_WT"
## check
table(tenx.mutant.integrated@meta.data$sub_genotype)
10X_WT Mutant WT
6191 2028 689
## split seurat object up
ob.list <- SplitObject(tenx.mutant.integrated, split.by = "sub_genotype")
## make plots for each object
plot.list <- lapply(X = ob.list, FUN = function(x) {
DimPlot(x, dims = c(2,1), reduction = "DIM_UMAP", label = FALSE, label.size = 5, repel = TRUE, pt.size = 1) + theme(legend.position="bottom")
})
## use this function to extract legend:
## source: https://stackoverflow.com/questions/13649473/add-a-common-legend-for-combined-ggplots
## source: https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)}
## make plots pretty
p1 <- plot.list$`10X_WT` + theme_void() + guides(colour=guide_legend(nrow=2,byrow=TRUE, override.aes = list(size=4)))
p2 <- plot.list$WT + theme_void()
p3 <- plot.list$Mutant + theme_void()
## get legend
mylegend<-g_legend(p1)
## make a final plot
p4 <- grid.arrange(arrangeGrob(p1 + theme(legend.position="none") + labs(title = paste("10X", "\n", "(wild-type)")) + theme(plot.title = element_text(hjust = 0.5)),
p2 + theme(legend.position="none") + labs(title = paste("Smart-seq2", "\n", "(wild-type)")) + theme(plot.title = element_text(hjust = 0.5)),
p3 + theme(legend.position="none") + labs(title = paste("Smart-seq2", "\n", "(mutant)")) + theme(plot.title = element_text(hjust = 0.5)), nrow=1),
mylegend, nrow=2,heights=c(10, 1))
Make final plots:
p1 <- plot.list$`10X_WT` +
coord_fixed() +
theme_void() +
scale_color_manual(values=c(replicate(45, "#999999"))) +
labs(title = paste("10X (wild-type)")) +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5, family="Arial", size = 13, face = "bold"))
p2 <- plot.list$WT +
coord_fixed() +
theme_void() +
scale_color_manual(values=c(replicate(46, "#999999"))) +
labs(title = paste("Smart-seq2 (wild-type)")) +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5, family="Arial", size = 13, face = "bold"))
p3 <- plot.list$Mutant +
coord_fixed() +
theme_void() +
scale_color_manual(values=c(replicate(46, "#999999"))) +
labs(title = paste("Smart-seq2 (mutant)")) +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5, family="Arial", size = 13, face = "bold"))
composition_umap_all <- p1 + p2 + p3
composition_umap_all
save
ggsave("../images_to_export/ALLCELLS_composition_umap.png", plot = composition_umap_all, device = "png", path = NULL, scale = 1, width = 20, height = 20, units = "cm", dpi = 300, limitsize = TRUE)
## make composite plot
UMAP_composite <- wrap_plots(marker_gene_plot_FAMB , marker_gene_plot_MSP8 , marker_gene_plot_MSP1 , marker_gene_plot_AP2G , marker_gene_plot_CCP2 , marker_gene_plot_MG1 , p1 , p2 , p3, ncol = 3)
## print
UMAP_composite
save
```r
ggsave(\../images_to_export/merge_umap_technology_and_markers.png\, plot = UMAP_composite, device = \png\, path = NULL, scale = 1, width = 30, height = 30, units = \cm\, dpi = 300, limitsize = TRUE)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
#### Fig. Sup. Look at specific mutants
All the mutant genotypes profiled were:
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuIyMgbWFrZSBhIGxpc3Qgb2YgcG9zc2libGUgZ2Vub3R5cGVzXG51bmlxdWUodGVueC5tdXRhbnQuaW50ZWdyYXRlZEBtZXRhLmRhdGEkaWRlbnRpdHlfdXBkYXRlZClcbmBgYCJ9 -->
```r
## make a list of possible genotypes
unique(tenx.mutant.integrated@meta.data$identity_updated)
[1] NA "GCSKO-oom" "WT" "GCSKO-29" "GCSKO-21" "GCSKO-17" "GCSKO-2" "GCSKO-19"
[9] "GCSKO-20" "GCSKO-13" "GCSKO-10_820" "GCSKO-3"
unique(tenx.mutant.integrated@meta.data$identity_name_updated)
[1] NA "md1" "wild-type" "md2" "fd1" "fd3" "md4" "md5" "fd4" "fd2"
[11] "md3" "gd1"
##colour pal:
#31206E - purple
#E2E2E2 - grey
# PBANKA-0828000 GCSKO-3 GD1
# PBANKA-1302700 GCSKO-oom MD1
# PBANKA-1447900 GCSKO-29 MD2
# PBANKA-0413400 GCSKO-10_820 MD3
# PBANKA-0102400 GCSKO-2 MD4
# PBANKA-0716500 GCSKO-19 MD5
# PBANKA-1454800 GCSKO-21 FD1
# PBANKA-0902300 GCSKO-13 FD2
# PBANKA-1418100 GCSKO-17 FD3
# PBANKA-1435200 GCSKO-20 FD4
## ~ TODO ~ MAKE INTO A FOR LOOP
## make lists for each genotype
cells_md1 <- rownames(tenx.mutant.integrated@meta.data[which(tenx.mutant.integrated@meta.data$identity_name_updated == "md1"), ])
cells_md2 <- rownames(tenx.mutant.integrated@meta.data[which(tenx.mutant.integrated@meta.data$identity_name_updated == "md2"), ])
cells_md3 <- rownames(tenx.mutant.integrated@meta.data[which(tenx.mutant.integrated@meta.data$identity_name_updated == "md3"), ])
cells_md4 <- rownames(tenx.mutant.integrated@meta.data[which(tenx.mutant.integrated@meta.data$identity_name_updated == "md4"), ])
cells_md5 <- rownames(tenx.mutant.integrated@meta.data[which(tenx.mutant.integrated@meta.data$identity_name_updated == "md5"), ])
cells_gd1 <- rownames(tenx.mutant.integrated@meta.data[which(tenx.mutant.integrated@meta.data$identity_name_updated == "gd1"), ])
cells_fd1 <- rownames(tenx.mutant.integrated@meta.data[which(tenx.mutant.integrated@meta.data$identity_name_updated == "fd1"), ])
cells_fd2 <- rownames(tenx.mutant.integrated@meta.data[which(tenx.mutant.integrated@meta.data$identity_name_updated == "fd2"), ])
cells_fd3 <- rownames(tenx.mutant.integrated@meta.data[which(tenx.mutant.integrated@meta.data$identity_name_updated == "fd3"), ])
cells_fd4 <- rownames(tenx.mutant.integrated@meta.data[which(tenx.mutant.integrated@meta.data$identity_name_updated == "fd4"), ])
## make plots
pm2 <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = cells_fd3, group.by = "exclude_for_sex_ratio", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#E2E2E2", "#31206E")) +
theme_void() +
labs(title = paste("fd3","\n", "(PBANKA_1418100)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 15, face = "bold"), legend.position = "none") +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
pm3 <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = cells_md4, group.by = "exclude_for_sex_ratio", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#E2E2E2", "#31206E")) +
theme_void() +
labs(title = paste("md4","\n", "(PBANKA_0102400)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 15, face = "bold"), legend.position = "none") +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
pm4 <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = cells_md5, group.by = "exclude_for_sex_ratio", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#E2E2E2", "#31206E")) +
theme_void() +
labs(title = paste("md5","\n", "(PBANKA_0716500)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 15, face = "bold"), legend.position = "none") +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
pm5 <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = cells_fd4, group.by = "exclude_for_sex_ratio", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#E2E2E2", "#31206E")) +
theme_void() +
labs(title = paste("fd4","\n", "(PBANKA_1435200)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 15, face = "bold"), legend.position = "none") +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
pm6 <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = cells_fd2, group.by = "exclude_for_sex_ratio", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#E2E2E2", "#31206E")) +
theme_void() +
labs(title = paste("fd2","\n", "PBANKA_0902300")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 15, face = "bold"), legend.position = "none") +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
pm7 <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = cells_md3, group.by = "exclude_for_sex_ratio", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#E2E2E2", "#31206E")) +
theme_void() +
labs(title = paste("md3","\n", "(PBANKA_0413400)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 15, face = "bold"), legend.position = "none") +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
pm8 <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = cells_gd1, group.by = "exclude_for_sex_ratio", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#E2E2E2", "#31206E")) +
theme_void() +
labs(title = paste("gd1","\n", "(PBANKA_0828000)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 15, face = "bold"), legend.position = "none") +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
pm9 <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = cells_md1, group.by = "exclude_for_sex_ratio", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#E2E2E2", "#31206E")) +
theme_void() +
labs(title = paste("md1","\n", "(PBANKA_1302700)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 15, face = "bold"), legend.position = "none") +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
pm10 <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = cells_md2, group.by = "exclude_for_sex_ratio", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#E2E2E2", "#31206E")) +
theme_void() +
labs(title = paste("md2","\n", "(PBANKA_1447900)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 15, face = "bold"), legend.position = "none") +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
pm11 <- DimPlot(tenx.mutant.integrated, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = cells_fd1, group.by = "exclude_for_sex_ratio", dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#E2E2E2", "#31206E")) +
theme_void() +
labs(title = paste("fd1","\n", "(PBANKA_1454800)")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 15, face = "bold"), legend.position = "none") +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
## plot composite plot
## not used as outside plots have odd sizes
#pm1 + pm2 + pm4 + pm5 + pm11 + pm7 + pm6 + pm8 + pm9 + pm10 + pm3
## plot composite plot
mutant_cell_locations <- plot_grid(pm8 + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")),
pm9 + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")),
pm10 + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")),
pm7 + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")),
pm3 + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")),
pm11 + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")),
pm6 + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")),
pm2 + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")),
pm5 + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")),
pm4 + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")),
nrow = 2)
font family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font databasefont family 'Arial' not found in PostScript font database
## print
mutant_cell_locations
save
ggsave("../images_to_export/ALLCELLS_merge_umap_mutant_cell_locations.png", plot = mutant_cell_locations, device = "png", path = NULL, scale = 1, width = 30, height = 20, units = "cm", dpi = 300, limitsize = TRUE)
Make a subsetted Seurat object of sexual cells.
Include the pre-branch too as well as any weird clusters that may have clustered out.
it’s been a while since we looked at the clusters so let’s check them out again:
## Plot
DimPlot(tenx.mutant.integrated, label = TRUE, repel = TRUE, pt.size = 0.05, group.by = "seurat_clusters", dims = c(2,1), reduction = "DIM_UMAP") + coord_fixed()
## define cells
## 2 and 0 are at the beginning of the stalk
sex_clusters <- c(bipotential_clusters, female_clusters, male_clusters, "5", "7")
## subset cells into new object
tenx.mutant.integrated.sex <- subset(tenx.mutant.integrated, idents = sex_clusters)
## inspect object
tenx.mutant.integrated.sex
An object of class Seurat
10116 features across 3020 samples within 2 assays
Active assay: integrated (5018 features, 2000 variable features)
1 other assay present: RNA
3 dimensional reductions calculated: pca, umap, DIM_UMAP
## look at original UMAP
DimPlot(tenx.mutant.integrated.sex, label = TRUE, repel = TRUE, pt.size = 0.1, split.by = "experiment", dims = c(2,1), reduction = "DIM_UMAP") + coord_fixed()
we want to remove:
## look at original UMAP
plot_sexual_subsetting <- DimPlot(tenx.mutant.integrated.sex, label = TRUE, repel = TRUE, pt.size = 0.1, dims = c(2,1), reduction = "DIM_UMAP") + coord_fixed() + geom_hline(aes(yintercept = -0.80, alpha = 5)) + geom_vline(aes(xintercept = -0.1, alpha = 5))
plot_sexual_subsetting
Look interactively:
HoverLocator(plot = plot_sexual_subsetting, information = FetchData(object = tenx.mutant.integrated.sex, vars = 'identity_name_updated'))
`arrange_()` is deprecated as of dplyr 0.7.0.
Please use `arrange()` instead.
See vignette('programming') for more help
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be used`error_y.color` does not currently support multiple values.`error_x.color` does not currently support multiple values.`line.color` does not currently support multiple values.The titlefont attribute is deprecated. Use title = list(font = ...) instead.the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be used`error_y.color` does not currently support multiple values.`error_x.color` does not currently support multiple values.`line.color` does not currently support multiple values.The titlefont attribute is deprecated. Use title = list(font = ...) instead.
## extract cell embeddings
df_sex_cell_embeddings <- as.data.frame(tenx.mutant.integrated.sex@reductions[["DIM_UMAP"]]@cell.embeddings)
## subset anything lower than -0.8 in UMAP 2 and -0.1 in UMAP 1
remove_cells <- row.names(df_sex_cell_embeddings[which(df_sex_cell_embeddings$DIMUMAP_2 < -0.1 | df_sex_cell_embeddings$DIMUMAP_1 < -0.8), ])
## plot these cells
DimPlot(tenx.mutant.integrated.sex, label = FALSE, repel = TRUE, pt.size = 0.1, cells.highlight = remove_cells, dims = c(2,1), reduction = "DIM_UMAP") +
coord_fixed() +
scale_color_manual(values=c("#000000", "#f54e1e")) +
theme_void() +
labs(title = paste("cells highlighted will be removed")) +
theme(plot.title = element_text(hjust = 0.5), legend.position = "none")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the
existing scale.
Add a meta.data column so that 10X is listed as WT:
## get cells that are filtered out
mutant_cells <- which(tenx.mutant.integrated.sex$experiment == "mutants")
## make extra column in plotting df
tenx.mutant.integrated.sex@meta.data$identity_combined <- "wild-type (10x)"
tenx.mutant.integrated.sex@meta.data$identity_combined[mutant_cells] <- tenx.mutant.integrated.sex@meta.data$identity_name_updated[mutant_cells]
tenx.mutant.integrated.sex@meta.data$identity_combined[which(tenx.mutant.integrated.sex@meta.data$identity_combined == "wild-type")] <- "wild-type (Smart-seq2)"
## make a combined genotype column too
tenx.mutant.integrated.sex@meta.data$genotype_combined <- "WT"
tenx.mutant.integrated.sex@meta.data$genotype_combined[mutant_cells] <- tenx.mutant.integrated.sex@meta.data$genotype[mutant_cells]
library("colorspace")
subset_plot_A <- DimPlot(tenx.mutant.integrated.sex,
label = TRUE,
repel = TRUE,
pt.size = 1,
dims = c(2,1),
reduction = "DIM_UMAP"
) +
coord_fixed() +
geom_hline(aes(yintercept = -0.80, alpha = 5)) +
geom_vline(aes(xintercept = -0.1, alpha = 5)) +
## remove alpha guide
scale_alpha(guide = 'none') +
## change x and y labels
labs(y = "UMAP 1", x = "UMAP 2") +
## change guide titles
guides(color=guide_legend(title="Cluster")) +
theme(text = element_text(size = 10))
subset_plot_B <- DimPlot(tenx.mutant.integrated.sex,
label = FALSE,
repel = TRUE,
pt.size = 1,
dims = c(2,1),
reduction = "DIM_UMAP",
group.by = "identity_combined") +
coord_fixed() +
geom_hline(aes(yintercept = -0.80, alpha = 5)) +
geom_vline(aes(xintercept = -0.1, alpha = 5)) +
## remove alpha guide
scale_alpha(guide = 'none') +
## change x and y labels
labs(y = "UMAP 1", x = "UMAP 2") +
scale_color_discrete_qualitative(palette = "Dark 3") +
## change guide titles
guides(color=guide_legend(title="Genotype")) +
theme(text = element_text(size = 10))
subset_plot <- subset_plot_A + subset_plot_B
subset_plot
save
ggsave("../images_to_export/ALLCELLS_subset_sexual.png", plot = subset_plot, device = "png", path = NULL, scale = 1, width = 20, height = 10, units = "cm", dpi = 300, limitsize = TRUE)
then check what the IDs of these cells are to ensure they aren’t a genuine mutant signature
tenx.mutant.integrated.sex@meta.data[rownames(tenx.mutant.integrated.sex@meta.data) %in% remove_cells, ]$identity_combined
[1] "wild-type (10x)" "fd1" "fd1" "md5" "wild-type (Smart-seq2)"
[6] "md3" "md3" "wild-type (Smart-seq2)"
Although there are a number of GCSKO-21 cells, there are still many remaining in the sex cluster above and the cells near the asexual cycle also have a GCSKO-17 cell with them and are therefore not exclusively belonging to that mutant so we will remove these cells.
## make keep cells from the remove_cells
## make the not in function
'%ni%' <- Negate('%in%')
keep_cells <- colnames(tenx.mutant.integrated.sex)[which(colnames(tenx.mutant.integrated.sex) %ni% remove_cells)]
## subset
tenx.mutant.integrated.sex <- subset(tenx.mutant.integrated.sex, cells = keep_cells)
## inspect
tenx.mutant.integrated.sex
An object of class Seurat
10116 features across 3012 samples within 2 assays
Active assay: integrated (5018 features, 2000 variable features)
1 other assay present: RNA
3 dimensional reductions calculated: pca, umap, DIM_UMAP
copy old clusters over
## copy old clusters
tenx.mutant.integrated.sex <- AddMetaData(tenx.mutant.integrated.sex, tenx.mutant.integrated.sex@meta.data$seurat_clusters, col.name = "post_integration_clusters")
Save environment
```r
## This saves everything in the global environment for easy recall later
#save.image(file = \GCSKO_merge.RData\)
#load(file = \GCSKO_merge.RData\)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
Save object(s)
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuIyMgU2F2ZSBhbiBvYmplY3QgdG8gYSBmaWxlXG5zYXZlUkRTKHRlbngubXV0YW50LmludGVncmF0ZWQuc2V4LCBmaWxlID0gXCIuLi9kYXRhX3RvX2V4cG9ydC90ZW54Lm11dGFudC5pbnRlZ3JhdGVkLnNleC5SRFNcIilcbiMjIFJlc3RvcmUgdGhlIG9iamVjdFxuI3JlYWRSRFMoZmlsZSA9IFwiLi4vZGF0YV90b19leHBvcnQvdGVueC5tdXRhbnQuaW50ZWdyYXRlZC5zZXguUkRTXCIpXG5cbiMjIHNhdmUgaW50ZWdyYXRlZCBvYmplY3QgdG8gZmlsZVxuc2F2ZVJEUyh0ZW54Lm11dGFudC5pbnRlZ3JhdGVkLCBmaWxlID0gXCIuLi9kYXRhX3RvX2V4cG9ydC90ZW54Lm11dGFudC5pbnRlZ3JhdGVkLlJEU1wiKSBcbiMjIHJlc3RvcmUgdGhlIG9iamVjdFxuI3RlbngubXV0YW50LmludGVncmF0ZWQgPC0gcmVhZFJEUyhcIi4uL2RhdGFfdG9fZXhwb3J0L3RlbngubXV0YW50LmludGVncmF0ZWQuUkRTXCIpXG5gYGAifQ== -->
```r
## Save an object to a file
saveRDS(tenx.mutant.integrated.sex, file = "../data_to_export/tenx.mutant.integrated.sex.RDS")
## Restore the object
#readRDS(file = "../data_to_export/tenx.mutant.integrated.sex.RDS")
## save integrated object to file
saveRDS(tenx.mutant.integrated, file = "../data_to_export/tenx.mutant.integrated.RDS")
## restore the object
#tenx.mutant.integrated <- readRDS("../data_to_export/tenx.mutant.integrated.RDS")
R version 4.0.3 (2020-10-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] stats4 parallel grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] colorspace_1.4-1 monocle3_0.2.3.0 SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.2
[5] DelayedArray_0.14.1 matrixStats_0.57.0 GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
[9] IRanges_2.22.2 S4Vectors_0.26.1 Biobase_2.48.0 BiocGenerics_0.34.0
[13] Nebulosa_1.2.0 dplyr_1.0.2 reshape2_1.4.4 Hmisc_4.4-1
[17] ggplot2_3.3.2 Formula_1.2-4 survival_3.2-7 lattice_0.20-41
[21] gridExtra_2.3 cowplot_1.1.0 Seurat_3.2.2 viridis_0.5.1
[25] viridisLite_0.3.0 patchwork_1.0.1 slingshot_1.6.1 princurve_2.1.6
loaded via a namespace (and not attached):
[1] backports_1.1.10 plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2 sp_1.4-4
[6] splines_4.0.3 crosstalk_1.1.0.1 listenv_0.8.0 usethis_1.6.3 digest_0.6.27
[11] htmltools_0.5.1.1 fansi_0.4.1 checkmate_2.0.0 magrittr_2.0.1 memoise_1.1.0
[16] gganimate_1.0.7 tensor_1.5 cluster_2.1.0 ks_1.12.0 ROCR_1.0-11
[21] limma_3.44.3 remotes_2.2.0 globals_0.13.1 prettyunits_1.1.1 jpeg_0.1-8.1
[26] ggrepel_0.8.2 xfun_0.18 callr_3.5.1 crayon_1.3.4 RCurl_1.98-1.2
[31] jsonlite_1.7.1 spatstat_1.64-1 spatstat.data_1.4-3 zoo_1.8-8 ape_5.4-1
[36] glue_1.4.2 polyclip_1.10-0 gtable_0.3.0 zlibbioc_1.34.0 XVector_0.28.0
[41] leiden_0.3.3 pkgbuild_1.1.0 future.apply_1.6.0 abind_1.4-5 scales_1.1.1
[46] pheatmap_1.0.12 mvtnorm_1.1-1 miniUI_0.1.1.1 Rcpp_1.0.6 htmlTable_2.1.0
[51] xtable_1.8-4 progress_1.2.2 reticulate_1.18 foreign_0.8-80 rsvd_1.0.3
[56] mclust_5.4.7 htmlwidgets_1.5.2 httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.1
[61] ica_1.0-2 pkgconfig_2.0.3 farver_2.0.3 nnet_7.3-14 uwot_0.1.8
[66] deldir_0.1-29 labeling_0.4.2 tidyselect_1.1.0 rlang_0.4.8 later_1.1.0.1
[71] munsell_0.5.0 tools_4.0.3 cli_2.1.0 generics_0.0.2 gifski_0.8.6
[76] devtools_2.3.2 ggridges_0.5.2 evaluate_0.14 stringr_1.4.0 fastmap_1.0.1
[81] yaml_2.2.1 goftest_1.2-2 knitr_1.30 fs_1.5.0 processx_3.4.4
[86] fitdistrplus_1.1-1 purrr_0.3.4 RANN_2.6.1 pbapply_1.4-3 future_1.19.1
[91] nlme_3.1-149 mime_0.9 compiler_4.0.3 rstudioapi_0.11 plotly_4.9.2.1
[96] png_0.1-7 testthat_2.3.2 spatstat.utils_1.17-0 tibble_3.0.4 tweenr_1.0.1
[101] stringi_1.5.3 ps_1.4.0 RSpectra_0.16-0 desc_1.2.0 Matrix_1.2-18
[106] vctrs_0.3.4 pillar_1.4.6 lifecycle_0.2.0 BiocManager_1.30.10 lmtest_0.9-38
[111] RcppAnnoy_0.0.16 data.table_1.13.2 bitops_1.0-6 irlba_2.3.3 raster_3.3-13
[116] httpuv_1.5.4 latticeExtra_0.6-29 R6_2.5.0 promises_1.1.1 KernSmooth_2.23-17
[121] sessioninfo_1.1.1 codetools_0.2-16 MASS_7.3-53 assertthat_0.2.1 pkgload_1.1.0
[126] rprojroot_1.3-2 withr_2.3.0 sctransform_0.3.1 GenomeInfoDbData_1.2.3 mgcv_1.8-33
[131] hms_0.5.3 rpart_4.1-15 tidyr_1.1.2 rmarkdown_2.5 Rtsne_0.15
[136] base64enc_0.1-3 shiny_1.5.0
– Subset only 10X cells
– cluster 24 is predetermination cells – cluster 29 is post cells – cluster 36 is post cells
```r
## Subset 10X Dataset, cluster 24
## extract only cells in cluster 24
seurat.object.subset <- SubsetData(tenx.mutant.integrated, subset.name = \seurat_clusters\, accept.value = c(\24\))
## get the names of the cells in cluster of interest
names_of_cells_in_cluster_24 <- colnames(seurat.object.subset@assays$RNA@counts)
## subset seurat
tenx_cluster_24 <- SubsetData(pb_sex_filtered, cells = names_of_cells_in_cluster_24)
## extract data
tenx_cluster_24_matrix_data <- as(as.matrix(GetAssayData(tenx_cluster_24, assay = \RNA\, slot = \data\)), 'sparseMatrix')
## extract counts
tenx_cluster_24_matrix_counts <- as(as.matrix(GetAssayData(tenx_cluster_24, assay = \RNA\, slot = \counts\)), 'sparseMatrix')
## extract meta data
## make big meta data dataframe
meta_df <- data.frame(tenx.mutant.integrated.sex@meta.data)
#meta_df <- data.frame(tenx.mutant.integrated@meta.data)
tenx_cluster_24_pd <- meta_df[which(rownames(meta_df) %in% colnames(tenx_cluster_24_matrix_counts)), ]
# save all 3 files
#write.csv(tenx_cluster_24_matrix_data, file = \~/data_to_export/tenx_cluster_24_matrix_data.csv\)
#write.csv(tenx_cluster_24_matrix_counts, file = \~/data_to_export/tenx_cluster_24_matrix_counts.csv\)
write.csv(tenx_cluster_24_pd, file = \~/data_to_export/tenx_cluster_24_pd.csv\)
## Subset 10X Dataset, cluster 29
# extract only cells in cluster 29
seurat.object.subset <- SubsetData(tenx.mutant.integrated, subset.name = \seurat_clusters\, accept.value = c(\29\))
#get the names of the cells in cluster of interest
names_of_cells_in_cluster_29 <- colnames(seurat.object.subset@assays$RNA@counts)
# subset seurat
tenx_cluster_29 <- SubsetData(pb_sex_filtered, cells = names_of_cells_in_cluster_29)
# extract data
tenx_cluster_29_matrix_data <- as(as.matrix(GetAssayData(tenx_cluster_29, assay = \RNA\, slot = \data\)), 'sparseMatrix')
# extract counts
tenx_cluster_29_matrix_counts <- as(as.matrix(GetAssayData(tenx_cluster_29, assay = \RNA\, slot = \counts\)), 'sparseMatrix')
# extract meta data
tenx_cluster_29_pd <- meta_df[which(rownames(meta_df) %in% colnames(tenx_cluster_29_matrix_counts)), ]
# save all 3 files
#write.csv(tenx_cluster_29_matrix_data, file = \~/data_to_export/tenx_cluster_29_matrix_data.csv\)
#write.csv(tenx_cluster_29_matrix_counts, file = \~/data_to_export/tenx_cluster_29_matrix_counts.csv\)
write.csv(tenx_cluster_29_pd, file = \~/data_to_export/tenx_cluster_29_pd.csv\)
## Subset 10X Dataset, cluster 36
# extract only cells in cluster 36
seurat.object.subset <- SubsetData(tenx.mutant.integrated, subset.name = \seurat_clusters\, accept.value = c(\36\))
#get the names of the cells in cluster of interest
names_of_cells_in_cluster_36 <- colnames(seurat.object.subset@assays$RNA@counts)
# subset seurat
tenx_cluster_36 <- SubsetData(pb_sex_filtered, cells = names_of_cells_in_cluster_36)
# extract data
tenx_cluster_36_matrix_data <- as(as.matrix(GetAssayData(tenx_cluster_36, assay = \RNA\, slot = \data\)), 'sparseMatrix')
# extract counts
tenx_cluster_36_matrix_counts <- as(as.matrix(GetAssayData(tenx_cluster_36, assay = \RNA\, slot = \counts\)), 'sparseMatrix')
# extract meta data
tenx_cluster_36_pd <- meta_df[which(rownames(meta_df) %in% colnames(tenx_cluster_36_matrix_counts)), ]
# save all 3 files
#write.csv(tenx_cluster_36_matrix_data, file = \~/data_to_export/tenx_cluster_36_matrix_data.csv\)
#write.csv(tenx_cluster_36_matrix_counts, file = \~/data_to_export/tenx_cluster_36_matrix_counts.csv\)
write.csv(tenx_cluster_36_pd, file = \~/data_to_export/tenx_cluster_36_pd.csv\)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
### old code
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuIyMgb2xkIHRocmVzaG9sZHMgb24gZGF0YSB3aXRoIDI4XG4jcmVtb3ZlX2NlbGxzIDwtIHJvdy5uYW1lcyhkZl9zZXhfY2VsbF9lbWJlZGRpbmdzW3doaWNoKGRmX3NleF9jZWxsX2VtYmVkZGluZ3MkRElNVU1BUF8yIDwgMC4xICYgZGZfc2V4X2NlbGxfZW1iZWRkaW5ncyRESU1VTUFQXzEgPiAtMS4yKSwgXSlcbmBgYCJ9 -->
```r
## old thresholds on data with 28
#remove_cells <- row.names(df_sex_cell_embeddings[which(df_sex_cell_embeddings$DIMUMAP_2 < 0.1 & df_sex_cell_embeddings$DIMUMAP_1 > -1.2), ])
Cowplot(plot_grid), patchwork(wrap_plots), and ggpubr can all allow multiple plots to be plotted together.
save
ggsave("../images_to_export/Figure_C.png", plot = Figure_publication, device = "png", path = NULL, scale = 1, width = 21, height = 29.7, units = "cm", dpi = 300, limitsize = TRUE)
save and export individual plots so it can be stitched with
Recluster dataset now that it is integrated. We will cluster with a number of resolutions to begin with to see how this affects the number and nature of the clusters.
## copy old clusters
tenx.mutant.integrated <- AddMetaData(tenx.mutant.integrated, tenx.mutant.integrated@meta.data$RNA_snn_res.1, col.name = "pre_integration_clusters")
## generate new clusters at low resolution
## 1
tenx.mutant.integrated <- FindNeighbors(tenx.mutant.integrated, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
tenx.mutant.integrated <- FindClusters(tenx.mutant.integrated, resolution = 1, random.seed = 42, algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8908
Number of edges: 319840
Running Louvain algorithm with multilevel refinement...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8616
Number of communities: 21
Elapsed time: 1 seconds
## generate new clusters at low resolution
## 1.2
tenx.mutant.integrated <- FindNeighbors(tenx.mutant.integrated, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
tenx.mutant.integrated <- FindClusters(tenx.mutant.integrated, resolution = 1.2, random.seed = 42, algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8908
Number of edges: 319840
Running Louvain algorithm with multilevel refinement...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8494
Number of communities: 22
Elapsed time: 2 seconds
## generate new clusters at low resolution
## 1.5
tenx.mutant.integrated <- FindNeighbors(tenx.mutant.integrated, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
tenx.mutant.integrated <- FindClusters(tenx.mutant.integrated, resolution = 1.5, random.seed = 42, algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8908
Number of edges: 319840
Running Louvain algorithm with multilevel refinement...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8315
Number of communities: 24
Elapsed time: 1 seconds
## generate new clusters at mid resolution
## 2
tenx.mutant.integrated <- FindNeighbors(tenx.mutant.integrated, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
tenx.mutant.integrated <- FindClusters(tenx.mutant.integrated, resolution = 2, random.seed = 42, algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8908
Number of edges: 319840
Running Louvain algorithm with multilevel refinement...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8052
Number of communities: 29
Elapsed time: 1 seconds
## generate new clusters at high resolution
## 4
tenx.mutant.integrated <- FindNeighbors(tenx.mutant.integrated, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
tenx.mutant.integrated <- FindClusters(tenx.mutant.integrated, resolution = 4, random.seed = 42, algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8908
Number of edges: 319840
Running Louvain algorithm with multilevel refinement...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.7355
Number of communities: 46
Elapsed time: 2 seconds
## print identities
#head(Idents(tenx.mutant.integrated), 10)
View
## Plot
DimPlot(tenx.mutant.integrated, label = TRUE, repel = TRUE, pt.size = 0.05, dims = c(2,1), reduction = "DIM_UMAP", group.by = "integrated_snn_res.1") + coord_fixed()
Make individual plots highlighting where cells in each cluster fall
plot
## this function writes the next bit of code for you
## put it into the console and paste the response
#ploty <- c()
#for(i in seq_along(levels(tenx.mutant.integrated@meta.data$seurat_clusters))){
# ploty <- paste0(ploty, "list_UMAPs_by_cluster[[", i, "]]", " + ")
#}
## plot
list_UMAPs_by_cluster[[1]] + list_UMAPs_by_cluster[[2]] + list_UMAPs_by_cluster[[3]] + list_UMAPs_by_cluster[[4]] + list_UMAPs_by_cluster[[5]] + list_UMAPs_by_cluster[[6]] + list_UMAPs_by_cluster[[7]] + list_UMAPs_by_cluster[[8]] + list_UMAPs_by_cluster[[9]] + list_UMAPs_by_cluster[[10]] + list_UMAPs_by_cluster[[11]] + list_UMAPs_by_cluster[[12]] + list_UMAPs_by_cluster[[13]] + list_UMAPs_by_cluster[[14]] + list_UMAPs_by_cluster[[15]] + list_UMAPs_by_cluster[[16]] + list_UMAPs_by_cluster[[17]] + list_UMAPs_by_cluster[[18]] + list_UMAPs_by_cluster[[19]] + list_UMAPs_by_cluster[[20]]
View
## Plot
DimPlot(tenx.mutant.integrated, label = TRUE, repel = TRUE, pt.size = 0.05, dims = c(2,1), reduction = "DIM_UMAP", group.by = "integrated_snn_res.1.2") + coord_fixed()
Make individual plots highlighting where cells in each cluster fall
[1] 22
plot
## this function writes the next bit of code for you
## put it into the console and paste the response
#ploty <- c()
#for(i in seq_along(levels(tenx.mutant.integrated@meta.data$seurat_clusters))){
# ploty <- paste0(ploty, "list_UMAPs_by_cluster[[", i, "]]", " + ")
#}
## plot
list_UMAPs_by_cluster[[1]] + list_UMAPs_by_cluster[[2]] + list_UMAPs_by_cluster[[3]] + list_UMAPs_by_cluster[[4]] + list_UMAPs_by_cluster[[5]] + list_UMAPs_by_cluster[[6]] + list_UMAPs_by_cluster[[7]] + list_UMAPs_by_cluster[[8]] + list_UMAPs_by_cluster[[9]] + list_UMAPs_by_cluster[[10]] + list_UMAPs_by_cluster[[11]] + list_UMAPs_by_cluster[[12]] + list_UMAPs_by_cluster[[13]] + list_UMAPs_by_cluster[[14]] + list_UMAPs_by_cluster[[15]] + list_UMAPs_by_cluster[[16]] + list_UMAPs_by_cluster[[17]] + list_UMAPs_by_cluster[[18]] + list_UMAPs_by_cluster[[19]] + list_UMAPs_by_cluster[[20]] + list_UMAPs_by_cluster[[21]] + list_UMAPs_by_cluster[[22]]
## Plot
DimPlot(tenx.mutant.integrated, label = TRUE, repel = TRUE, pt.size = 0.05, dims = c(2,1), reduction = "DIM_UMAP", group.by = "integrated_snn_res.1.5") + coord_fixed()
Make individual plots highlighting where cells in each cluster fall
[1] 24
## 1.5 resolution
list_UMAPs_by_cluster[[1]] + list_UMAPs_by_cluster[[2]] + list_UMAPs_by_cluster[[3]] + list_UMAPs_by_cluster[[4]] + list_UMAPs_by_cluster[[5]] + list_UMAPs_by_cluster[[6]] + list_UMAPs_by_cluster[[7]] + list_UMAPs_by_cluster[[8]] + list_UMAPs_by_cluster[[9]] + list_UMAPs_by_cluster[[10]] + list_UMAPs_by_cluster[[11]] + list_UMAPs_by_cluster[[12]] + list_UMAPs_by_cluster[[13]] + list_UMAPs_by_cluster[[14]] + list_UMAPs_by_cluster[[15]] + list_UMAPs_by_cluster[[16]] + list_UMAPs_by_cluster[[17]] + list_UMAPs_by_cluster[[18]] + list_UMAPs_by_cluster[[19]] + list_UMAPs_by_cluster[[20]] + list_UMAPs_by_cluster[[21]] + list_UMAPs_by_cluster[[22]] + list_UMAPs_by_cluster[[23]] + list_UMAPs_by_cluster[[24]] #+ list_UMAPs_by_cluster[[25]]
View
## Plot
DimPlot(tenx.mutant.integrated, label = TRUE, repel = TRUE, pt.size = 0.05, dims = c(2,1), reduction = "DIM_UMAP", group.by = "integrated_snn_res.2") + coord_fixed()
Make individual plots highlighting where cells in each cluster fall
plot
## this function writes the next bit of code for you
## put it into the console and paste the response
#ploty <- c()
#for(i in seq_along(levels(tenx.mutant.integrated@meta.data$seurat_clusters))){
# ploty <- paste0(ploty, "list_UMAPs_by_cluster[[", i, "]]", " + ")
#}
## plot
list_UMAPs_by_cluster[[1]] + list_UMAPs_by_cluster[[2]] + list_UMAPs_by_cluster[[3]] + list_UMAPs_by_cluster[[4]] + list_UMAPs_by_cluster[[5]] + list_UMAPs_by_cluster[[6]] + list_UMAPs_by_cluster[[7]] + list_UMAPs_by_cluster[[8]] + list_UMAPs_by_cluster[[9]] + list_UMAPs_by_cluster[[10]] + list_UMAPs_by_cluster[[11]] + list_UMAPs_by_cluster[[12]] + list_UMAPs_by_cluster[[13]] + list_UMAPs_by_cluster[[14]] + list_UMAPs_by_cluster[[15]] + list_UMAPs_by_cluster[[16]] + list_UMAPs_by_cluster[[17]] + list_UMAPs_by_cluster[[18]] + list_UMAPs_by_cluster[[19]] + list_UMAPs_by_cluster[[20]] + list_UMAPs_by_cluster[[21]] + list_UMAPs_by_cluster[[22]] + list_UMAPs_by_cluster[[23]] + list_UMAPs_by_cluster[[24]] + list_UMAPs_by_cluster[[25]] + list_UMAPs_by_cluster[[26]] + list_UMAPs_by_cluster[[27]] + list_UMAPs_by_cluster[[28]] + list_UMAPs_by_cluster[[29]]# + list_UMAPs_by_cluster[[30]] + list_UMAPs_by_cluster[[31]]
8 vs 11 on resolution 2 already looks pretty cool:
## reset the default identity
## generate new clusters at mid resolution
## 2
tenx.mutant.integrated <- FindNeighbors(tenx.mutant.integrated, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
tenx.mutant.integrated <- FindClusters(tenx.mutant.integrated, resolution = 2, random.seed = 42, algorithm = 2)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 8908
Number of edges: 319840
Running Louvain algorithm with multilevel refinement...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8052
Number of communities: 29
Elapsed time: 2 seconds
## Find deferentially expressed features between the two clusters
early.sex.de.markers <- FindMarkers(tenx.mutant.integrated, ident.1 = "8", ident.2 = "11")
| | 0 % ~calculating
|+ | 1 % ~05s
|++ | 3 % ~03s
|++ | 4 % ~02s
|+++ | 5 % ~02s
|++++ | 6 % ~02s
|++++ | 8 % ~02s
|+++++ | 9 % ~01s
|++++++ | 10% ~01s
|++++++ | 12% ~01s
|+++++++ | 13% ~01s
|++++++++ | 14% ~01s
|++++++++ | 15% ~01s
|+++++++++ | 17% ~01s
|+++++++++ | 18% ~01s
|++++++++++ | 19% ~01s
|+++++++++++ | 21% ~01s
|+++++++++++ | 22% ~01s
|++++++++++++ | 23% ~01s
|+++++++++++++ | 24% ~01s
|+++++++++++++ | 26% ~01s
|++++++++++++++ | 27% ~01s
|+++++++++++++++ | 28% ~01s
|+++++++++++++++ | 29% ~01s
|++++++++++++++++ | 31% ~01s
|+++++++++++++++++ | 32% ~01s
|+++++++++++++++++ | 33% ~01s
|++++++++++++++++++ | 35% ~01s
|++++++++++++++++++ | 36% ~01s
|+++++++++++++++++++ | 37% ~01s
|++++++++++++++++++++ | 38% ~01s
|++++++++++++++++++++ | 40% ~01s
|+++++++++++++++++++++ | 41% ~01s
|++++++++++++++++++++++ | 42% ~01s
|++++++++++++++++++++++ | 44% ~01s
|+++++++++++++++++++++++ | 45% ~01s
|++++++++++++++++++++++++ | 46% ~01s
|++++++++++++++++++++++++ | 47% ~01s
|+++++++++++++++++++++++++ | 49% ~01s
|+++++++++++++++++++++++++ | 50% ~01s
|++++++++++++++++++++++++++ | 51% ~01s
|+++++++++++++++++++++++++++ | 53% ~01s
|+++++++++++++++++++++++++++ | 54% ~01s
|++++++++++++++++++++++++++++ | 55% ~01s
|+++++++++++++++++++++++++++++ | 56% ~01s
|+++++++++++++++++++++++++++++ | 58% ~01s
|++++++++++++++++++++++++++++++ | 59% ~01s
|+++++++++++++++++++++++++++++++ | 60% ~01s
|+++++++++++++++++++++++++++++++ | 62% ~01s
|++++++++++++++++++++++++++++++++ | 63% ~01s
|+++++++++++++++++++++++++++++++++ | 64% ~00s
|+++++++++++++++++++++++++++++++++ | 65% ~00s
|++++++++++++++++++++++++++++++++++ | 67% ~00s
|++++++++++++++++++++++++++++++++++ | 68% ~00s
|+++++++++++++++++++++++++++++++++++ | 69% ~00s
|++++++++++++++++++++++++++++++++++++ | 71% ~00s
|++++++++++++++++++++++++++++++++++++ | 72% ~00s
|+++++++++++++++++++++++++++++++++++++ | 73% ~00s
|++++++++++++++++++++++++++++++++++++++ | 74% ~00s
|++++++++++++++++++++++++++++++++++++++ | 76% ~00s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~00s
|++++++++++++++++++++++++++++++++++++++++ | 78% ~00s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~00s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=01s
## order by FC
early.sex.de.markers <- early.sex.de.markers[rev(order(early.sex.de.markers$avg_logFC)),]
## view results
head(early.sex.de.markers)
# View(early.sex.de.markers)
early.sex.de.markers.screen.hits <- early.sex.de.markers[rownames(early.sex.de.markers) %in% c(screen_hits,"PBANKA-1437500"), ]
early.sex.de.markers.screen.hits
## PBANKA-1302700 - md1
## PBANKA-1437500 - Ap2G
## PBANKA-1214700 - conserved plasmodium protein
look at them across the dataset
DotPlot(tenx.mutant.integrated, features = rownames(early.sex.de.markers[1:10,])) + RotatedAxis()
DotPlot(tenx.mutant.integrated, features = screen_hits) + RotatedAxis()
## find all markers
#all.markers <- FindAllMarkers(tenx.mutant.integrated, only.pos = FALSE, min.pct = 0.25, logfc.threshold = 0.25)
top_two <- all.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
Error in group_by(., cluster) : object 'all.markers' not found
View
## Plot
DimPlot(tenx.mutant.integrated, label = TRUE, repel = TRUE, pt.size = 0.05, dims = c(2,1), reduction = "DIM_UMAP", group.by = "integrated_snn_res.4") + coord_fixed()
Make individual plots highlighting where cells in each cluster fall
[1] 46
plot
## this function writes the next bit of code for you
## put it into the console and paste the response
#ploty <- c()
#for(i in seq_along(levels(tenx.mutant.integrated@meta.data$seurat_clusters))){
# ploty <- paste0(ploty, "list_UMAPs_by_cluster[[", i, "]]", " + ")
#}
## plot
list_UMAPs_by_cluster[[1]] + list_UMAPs_by_cluster[[2]] + list_UMAPs_by_cluster[[3]] + list_UMAPs_by_cluster[[4]] + list_UMAPs_by_cluster[[5]] + list_UMAPs_by_cluster[[6]] + list_UMAPs_by_cluster[[7]] + list_UMAPs_by_cluster[[8]] + list_UMAPs_by_cluster[[9]] + list_UMAPs_by_cluster[[10]] + list_UMAPs_by_cluster[[11]] + list_UMAPs_by_cluster[[12]] + list_UMAPs_by_cluster[[13]] + list_UMAPs_by_cluster[[14]] + list_UMAPs_by_cluster[[15]] + list_UMAPs_by_cluster[[16]] + list_UMAPs_by_cluster[[17]] + list_UMAPs_by_cluster[[18]] + list_UMAPs_by_cluster[[19]] + list_UMAPs_by_cluster[[20]] + list_UMAPs_by_cluster[[21]] + list_UMAPs_by_cluster[[22]] + list_UMAPs_by_cluster[[23]] + list_UMAPs_by_cluster[[24]] + list_UMAPs_by_cluster[[25]] + list_UMAPs_by_cluster[[26]] + list_UMAPs_by_cluster[[27]] + list_UMAPs_by_cluster[[28]] + list_UMAPs_by_cluster[[29]] + list_UMAPs_by_cluster[[30]] + list_UMAPs_by_cluster[[31]] + list_UMAPs_by_cluster[[32]] + list_UMAPs_by_cluster[[33]] + list_UMAPs_by_cluster[[34]] + list_UMAPs_by_cluster[[35]] + list_UMAPs_by_cluster[[36]] + list_UMAPs_by_cluster[[37]] + list_UMAPs_by_cluster[[38]] + list_UMAPs_by_cluster[[39]] + list_UMAPs_by_cluster[[40]] + list_UMAPs_by_cluster[[41]] + list_UMAPs_by_cluster[[42]] + list_UMAPs_by_cluster[[43]] + list_UMAPs_by_cluster[[44]] + list_UMAPs_by_cluster[[45]] + list_UMAPs_by_cluster[[46]]
This has been replaced by looking at marker genes in wild-type only cells
# PBANKA-1319500 - CCP2 - female - used in 820 line
# PBANKA-0416100 - MG1 - dynenin heavy chain - male - used in 820 line
# PBANKA-1437500 - AP2G - commitment
# PBANKA-0831000 - MSP1 - late asexual
# PBANKA-1102200 - MSP8 - early asexual (from Bozdech paper)
# PBANKA-0711900 - HSP70 - promoter used for GFP and RFP expression in the mutants
# PBANKA-1400400 - FAMB - ring marker - discovered by looking for marker genes in data
# PBANKA-0722600 - Fam-b2 - ring marker - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5113031/
# theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
markers_list <- c("PBANKA-1319500", "PBANKA-0416100", "PBANKA-1437500", "PBANKA-0831000", "PBANKA-1102200", "PBANKA-0711900", "PBANKA-1400400", "PBANKA-0722600")
list_of_density_plots <- plot_density(tenx.mutant.integrated.wt, markers_list, joint = FALSE, combine = FALSE, dims = c(2,1), pal = "plasma", method = "ks")
## make composite plot
UMAP_composite <- wrap_plots(list_of_density_plots[[8]] + coord_fixed() + theme_void() + labs(title = paste("Fam-b2 (Ring)")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots[[5]] + coord_fixed() + theme_void() + labs(title = paste("MSP8 (Asexual)")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots[[4]] + coord_fixed() + theme_void() + labs(title = paste("MSP1 (Schizont)")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots[[3]] + coord_fixed() + theme_void() + labs(title = paste("AP2G (Commitment)")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30)) )+ guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots[[1]] + coord_fixed() + theme_void() +
labs(title = paste("CCP2 (Female)")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots[[2]] + coord_fixed() + theme_void() + labs(title = paste("MG1 (Male)")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
p1 ,
p2 ,
p3,
ncol = 3)
UMAP_composite
Specific gene expression of mutants
# PBANKA-1418100 GCSKO-17 FD3
# PBANKA-0102400 GCSKO-2 MD3
# PBANKA-0716500 GCSKO-19 MD4
# PBANKA-1435200 GCSKO-20 FD4
# PBANKA-0902300 GCSKO-13 FD2
# PBANKA-0413400 GCSKO-10_820 MD5
# PBANKA-0828000 GCSKO-3 GD1
# PBANKA-1302700 GCSKO-oom MD1
# PBANKA-1447900 GCSKO-29 MD2
# PBANKA-1454800 GCSKO-21 FD1
# PBANKA-1144800 GCSKO-28 FD5
marker_gene_plot_17 <- FeaturePlot(tenx.mutant.integrated, dims = c(2,1), reduction = "DIM_UMAP", features = "PBANKA-1418100", coord.fixed = TRUE, min.cutoff = "q10", max.cutoff = "q95", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("fd3")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
marker_gene_plot_2 <- FeaturePlot(tenx.mutant.integrated, dims = c(2,1), reduction = "DIM_UMAP", features = "PBANKA-0102400", coord.fixed = TRUE, min.cutoff = "q10", max.cutoff = "q95", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("2")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
marker_gene_plot_19 <- FeaturePlot(tenx.mutant.integrated, dims = c(2,1), reduction = "DIM_UMAP", features = "PBANKA-0716500", coord.fixed = TRUE, min.cutoff = "q10", max.cutoff = "q95", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("19")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
marker_gene_plot_20 <- FeaturePlot(tenx.mutant.integrated, dims = c(2,1), reduction = "DIM_UMAP", features = "PBANKA-1435200", coord.fixed = TRUE, min.cutoff = "q10", max.cutoff = "q95", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("20")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
marker_gene_plot_13 <- FeaturePlot(tenx.mutant.integrated, dims = c(2,1), reduction = "DIM_UMAP", features = "PBANKA-0902300", coord.fixed = TRUE, min.cutoff = "q10", max.cutoff = "q95", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("13")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
marker_gene_plot_10 <- FeaturePlot(tenx.mutant.integrated, dims = c(2,1), reduction = "DIM_UMAP", features = "PBANKA-0413400", coord.fixed = TRUE, min.cutoff = "q10", max.cutoff = "q95", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("10")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
marker_gene_plot_3 <- FeaturePlot(tenx.mutant.integrated, dims = c(2,1), reduction = "DIM_UMAP", features = "PBANKA-0828000", coord.fixed = TRUE, min.cutoff = "q10", max.cutoff = "q95", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("3")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
marker_gene_plot_oom <- FeaturePlot(tenx.mutant.integrated, dims = c(2,1), reduction = "DIM_UMAP", features = "PBANKA-1302700", coord.fixed = TRUE, min.cutoff = "q10", max.cutoff = "q95", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("oom")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
marker_gene_plot_29 <- FeaturePlot(tenx.mutant.integrated, dims = c(2,1), reduction = "DIM_UMAP", features = "PBANKA-1447900", coord.fixed = TRUE, min.cutoff = "q10", max.cutoff = "q95", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("29")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
marker_gene_plot_21 <- FeaturePlot(tenx.mutant.integrated, dims = c(2,1), reduction = "DIM_UMAP", features = "PBANKA-1454800", coord.fixed = TRUE, min.cutoff = "q10", max.cutoff = "q95", pt.size = 1, order = TRUE) +
theme_void() +
labs(title = paste("21")) +
theme(plot.title = element_text(hjust = 0.5, family="Arial", size = 20, face = "bold")) +
scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) +
## add sex symbols
annotate("text", x = 3.8, y = 1.5, label = male_symbol, size=7, color="gray") +
annotate("text", x = 2, y = 2.8, label = female_symbol, size=7, color="gray")
##original label:
# labs(title = paste("(CCP2; Female)","\n", "PBANKA_1319500"))
## make composite plot
mutant_expression_composite <- wrap_plots(marker_gene_plot_17 , marker_gene_plot_2 , marker_gene_plot_19 , marker_gene_plot_20 , marker_gene_plot_13 , marker_gene_plot_10 , marker_gene_plot_3 , marker_gene_plot_oom , marker_gene_plot_29 , marker_gene_plot_21 , ncol = 4)
## print
mutant_expression_composite
save
ggsave("../images_to_export/merge_umap_mutant_gene_expression.png", plot = mutant_expression_composite, device = "png", path = NULL, scale = 1, width = 30, height = 30, units = "cm", dpi = 300, limitsize = TRUE)
Density plot version
# PBANKA-0828000 GCSKO-3 GD1
# PBANKA-1302700 GCSKO-oom MD1
# PBANKA-1447900 GCSKO-29 MD2
# PBANKA-0102400 GCSKO-2 MD3
# PBANKA-0716500 GCSKO-19 MD4
# PBANKA-0413400 GCSKO-10_820 MD5
# PBANKA-1454800 GCSKO-21 FD1
# PBANKA-0902300 GCSKO-13 FD2
# PBANKA-1418100 GCSKO-17 FD3
# PBANKA-1435200 GCSKO-20 FD4
markers_list <- c("PBANKA-0828000", "PBANKA-1302700", "PBANKA-1447900", "PBANKA-0102400", "PBANKA-0716500", "PBANKA-0413400", "PBANKA-1454800", "PBANKA-0902300", "PBANKA-1418100", "PBANKA-1435200")
list_of_density_plots_mutant_genes <- plot_density(tenx.mutant.integrated.wt, markers_list, joint = FALSE, combine = FALSE, dims = c(2,1), pal = "plasma", method = "ks")
## make composite plot
UMAP_composite_mutant_genes <- wrap_plots(list_of_density_plots_mutant_genes[[1]] + coord_fixed() + theme_void() + labs(title = paste("gd1")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots_mutant_genes[[2]] + coord_fixed() + theme_void() + labs(title = paste("md1")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots_mutant_genes[[3]] + coord_fixed() + theme_void() + labs(title = paste("md2")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots_mutant_genes[[4]] + coord_fixed() + theme_void() + labs(title = paste("md3")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30)) )+ guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots_mutant_genes[[5]] + coord_fixed() + theme_void() +
labs(title = paste("md4")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots_mutant_genes[[6]] + coord_fixed() + theme_void() + labs(title = paste("md5")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots_mutant_genes[[7]] + coord_fixed() + theme_void() + labs(title = paste("fd1")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots_mutant_genes[[8]] + coord_fixed() + theme_void() + labs(title = paste("fd2")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots_mutant_genes[[9]] + coord_fixed() + theme_void() + labs(title = paste("fd3")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
list_of_density_plots_mutant_genes[[10]] + coord_fixed() + theme_void() + labs(title = paste("fd4")) + scale_colour_gradientn(colours=c("#DCDCDC", plasma(30))) + guides(colour = guide_colourbar(barwidth = 0.5, title = "")),
ncol = 4)
UMAP_composite_mutant_genes
save
ggsave("../images_to_export/merge_umap_mutant_gene_expression.png", plot = UMAP_composite_mutant_genes, device = "png", path = NULL, scale = 1, width = 30, height = 30, units = "cm", dpi = 300, limitsize = TRUE)
We will use the following marker genes:
# PBANKA-1319500 - CCP2 - female - used in 820 line
# PBANKA-0416100 - MG1 - dynenin heavy chain - male - used in 820 line
# PBANKA-0831000 - MSP1 - late asexual
# PBANKA-1102200 - MSP8 - early asexual (from Bozdech paper)
# PBANKA-1437500 - AP2G - commitment
plot expression of these marker genes in each cluster
## copy the clusters so you don't permanently edit the master
tenx.mutant.integrated.wt@meta.data$seurat_clusters_plotting <- tenx.mutant.integrated.wt@meta.data$seurat_clusters
## reorder the levels so you can plot the cluters as you wish
my_levels <- c(asex_clusters, bipotential_clusters, male_clusters, female_clusters)
## reorder the levels
tenx.mutant.integrated.wt@meta.data$seurat_clusters_plotting <- factor(x = tenx.mutant.integrated.wt@meta.data$seurat_clusters_plotting, levels = my_levels)
## plot
dot_plot_markers <- DotPlot(tenx.mutant.integrated.wt, features = c("PBANKA-1319500", "PBANKA-0416100", "PBANKA-1437500", "PBANKA-0831000", "PBANKA-1102200"), group.by = "seurat_clusters_plotting") +
theme_classic() +
# change appearance and remove axis elements, and make room for arrows
theme(axis.text.x = element_text(size=16, angle = 45, hjust=1,vjust=1, family = "Arial"), text=element_text(size=16, family="Arial"), legend.position = "bottom", legend.direction = "horizontal", legend.box = "vertical", plot.title = element_blank(), plot.margin = unit(c(1,3,1,3), "lines")) +
#change the colours
scale_colour_viridis(option = "inferno", guide = "colourbar", na.value="white", begin = 0, end = 1, direction = 1) +
## change x axis label
labs(x = "Marker Genes", y = "Cluster", title = "Expression of Marker Genes by Cluster") +
## add arrows
#annotate("segment", x = 5.5, xend = 5.5, y = 21.5, yend = 25, colour = "green", size=1, alpha=1, arrow=arrow(length=unit(0.30,"cm"), type = "closed")) +
#annotate("segment", x = 5.5, xend = 5.5, y = 16.5, yend = 21.5, colour = "red", size=1, alpha=1, arrow=arrow(length=unit(0.30,"cm"), type = "closed")) +
#annotate("segment", x = 5.5, xend = 5.5, y = 0, yend = 15.5, colour = "grey", size=1, alpha=1, arrow=arrow(length=unit(0.30,"cm"), type = "closed")) +
## annotate asex
geom_hline(aes(yintercept = (length(asex_clusters)+0.5))) +
## annotate bipotential
geom_hline(aes(yintercept = (length(c(asex_clusters, bipotential_clusters))+0.5))) +
## annotate sexes
geom_hline(aes(yintercept = (length(c(asex_clusters, bipotential_clusters, male_clusters))+0.5))) +
## change label on bottom of plot so we can indicate markers
scale_x_discrete(labels = rev(c(paste("PBANKA-1102200","\n", "(MSP8; early asexual)"), paste("PBANKA-0831000","\n", "(MSP1; late asexual)"), paste("PBANKA-1437500", "\n", "(AP2G; sexual commitment)"), paste("PBANKA-0416100", "\n", "(MG1; male)"), paste("PBANKA-1319500", "\n", "(CCP2; female)"))))
## view
print(dot_plot_markers)
gene identities for the mutants profiled
# PBANKA-0828000 GCSKO-3 GD1
# PBANKA-1302700 GCSKO-oom MD1
# PBANKA-1447900 GCSKO-29 MD2
# PBANKA-0102400 GCSKO-2 MD3
# PBANKA-0716500 GCSKO-19 MD4
# PBANKA-0413400 GCSKO-10_820 MD5
# PBANKA-1454800 GCSKO-21 FD1
# PBANKA-0902300 GCSKO-13 FD2
# PBANKA-1418100 GCSKO-17 FD3
# PBANKA-1435200 GCSKO-20 FD4
plot expression of these mutant genes by cluster
## plot
dot_plot_mutant_genes <- DotPlot(tenx.mutant.integrated.wt, features = c("PBANKA-0828000", "PBANKA-1302700", "PBANKA-1447900", "PBANKA-0102400", "PBANKA-0716500","PBANKA-1454800", "PBANKA-1418100", "PBANKA-0902300", "PBANKA-0413400", "PBANKA-1435200"), group.by = "seurat_clusters_plotting") +
theme_classic() +
## change appearance and remove axis elements, and make room for arrows, and also change posoition of legends relative to one another
theme(axis.text.x = element_text(size=12, angle = 45, hjust=1,vjust=1), legend.position = "bottom", legend.direction = "horizontal", legend.box = "vertical", plot.margin = unit(c(1,3,1,3), "lines"), text=element_text(size=16, family="Arial")) +
##add these to above to remove y = plot.title = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.title.y = element_blank()
## change the colours
scale_colour_viridis(option = "inferno", guide = "colourbar", na.value="white", begin = 0, end = 1, direction = 1) +
## change x axis label
labs(x = "Mutant Genes", title = "Expression of mutant genes by cluster", y = "Cluster") +
## annotate asex
geom_hline(aes(yintercept = (length(asex_clusters)+0.5))) +
## annotate bipotential
geom_hline(aes(yintercept = (length(c(asex_clusters, bipotential_clusters))+0.5))) +
## annotate sexes
geom_hline(aes(yintercept = (length(c(asex_clusters, bipotential_clusters, male_clusters))+0.5))) +
## change label on bottom of plot so we can indicate markers
scale_x_discrete(labels = rev(c(paste("PBANKA-1435200", "\n", "fd4"),
paste("PBANKA-0413400","\n", "md5"),
paste("PBANKA-0902300", "\n", "fd2"),
paste("PBANKA-1418100", "\n", "fd3"),
paste("PBANKA_1454800","\n", "fd1"),
paste("PBANKA-0716500", "\n", "md4"),
paste("PBANKA-0102400", "\n", "md3"),
paste("PBANKA-1447900", "\n", "md2"),
paste("PBANKA-1302700", "\n", "md1"),
paste("PBANKA-0828000", "\n", "gd1"))))
## view
print(dot_plot_mutant_genes)
make a metadata column where the 10X data is classified as a WT genotype
## get cells that are filtered out
cells_10x <- which(tenx.mutant.integrated@meta.data$experiment == "tenx_5k")
## make extra column in plotting df
tenx.mutant.integrated@meta.data$genotype_combined <- tenx.mutant.integrated@meta.data$genotype
tenx.mutant.integrated@meta.data$genotype_combined[cells_10x] <- "WT"
## inspect
table(tenx.mutant.integrated@meta.data$genotype_combined)
Plot expression of mutant genes by cluster (which is subdivided by genotype)
This is kind of a control because the mutant should express less of the gene of interest at some point due to the inclusion of the mutant cells
## plot
dot_plot_mutant_genes_genotype <- DotPlot(tenx.mutant.integrated, features = c("PBANKA-0828000", "PBANKA-1302700", "PBANKA-1447900", "PBANKA-0102400", "PBANKA-0716500", "PBANKA-1435200", "PBANKA-1418100", "PBANKA-1144800", "PBANKA-0902300", "PBANKA-0413400", "PBANKA-1454800"), group.by = "seurat_clusters_plotting", split.by = "genotype_combined") +
## make appearance smoother
theme_classic() +
## change appearance and remove axis elements, and make room for arrows
theme(axis.text.x = element_text(size=12, angle = 45, hjust=1,vjust=1), legend.position = "bottom", plot.title = element_blank(), plot.margin = unit(c(1,3,1,1), "lines")) +
## change the colours
#scale_colour_viridis(option = "inferno", guide = "colourbar", na.value="white", begin = 0, end = 1, direction = 1) +
## change x axis label
labs(x = "Marker Genes") +
## annotate males
geom_hline(aes(yintercept = 56.5)) +
## annotate females
geom_hline(aes(yintercept = 48.5)) +
## annotate hermaphrodite
geom_hline(aes(yintercept = 46.5))
## change label on bottom of plot so we can indicate markers
#scale_x_discrete(labels = c(paste("PBANKA-1102200","\n", "(MSP8; early asexual)"), paste("PBANKA-0831000","\n", "(MSP1; late asexual)"), paste("PBANKA-1437500", "\n", "(AP2G; sexual commitment)"), paste("PBANKA-0416100", "\n", "(MG1; male)"), paste("PBANKA-1319500", "\n", "(CCP2; female)")))
## view
print(dot_plot_mutant_genes_genotype)
## plot
dot_plot_mutants_experiment <- DotPlot(tenx.mutant.integrated, features = c("PBANKA-0828000", "PBANKA-1302700", "PBANKA-1447900", "PBANKA-0102400", "PBANKA-0716500", "PBANKA-1435200", "PBANKA-1418100", "PBANKA-1144800", "PBANKA-0902300", "PBANKA-0413400", "PBANKA-1454800"), group.by = "seurat_clusters_plotting", split.by = "sub_genotype", cols = c("red", "blue", "green")) +
theme_classic() +
# change appearance and remove axis elements, and make room for arrows
theme(axis.text.x = element_text(size=12, angle = 45, hjust=1,vjust=1), legend.position = "bottom", plot.title = element_blank(), plot.margin = unit(c(1,3,1,1), "lines")) +
#change the colours
#scale_colour_viridis(option = "inferno", guide = "colourbar", na.value="white", begin = 0, end = 1, direction = 1) +
## change x axis label
labs(x = "Marker Genes") +
## annotate males
geom_hline(aes(yintercept = 77)) +
## annotate females
geom_hline(aes(yintercept = 61)) +
## annotate hermaphrodite
geom_hline(aes(yintercept = 59))
## change label on bottom of plot so we can indicate markers
#scale_x_discrete(labels = c(paste("PBANKA-1102200","\n", "(MSP8; early asexual)"), paste("PBANKA-0831000","\n", "(MSP1; late asexual)"), paste("PBANKA-1437500", "\n", "(AP2G; sexual commitment)"), paste("PBANKA-0416100", "\n", "(MG1; male)"), paste("PBANKA-1319500", "\n", "(CCP2; female)")))
## view
print(dot_plot_mutants_experiment)
Add a meta.data column so that 10X is listed as WT:
## get cells that are filtered out
mutant_cells <- which(tenx.mutant.integrated$experiment == "mutants")
## make extra column in plotting df
tenx.mutant.integrated@meta.data$identity_combined <- "WT_10X"
tenx.mutant.integrated@meta.data$identity_combined[mutant_cells] <- tenx.mutant.integrated@meta.data$identity_updated[mutant_cells]
prepare data for dotplotting
## make a dataframe that is a copy of the meta data
df_meta_data <- as.data.frame(tenx.mutant.integrated@meta.data)
## redefine order of clusters:
df_meta_data$seurat_clusters <- factor(x = df_meta_data$seurat_clusters, levels = my_levels)
## make a new df of CLUSTER and IDENTITY
dot_plot_df <- as.data.frame.matrix(table(df_meta_data$seurat_clusters, df_meta_data$identity_combined))
dot_plot_df$cluster <- rownames(dot_plot_df)
## calculate percentage of cells for each genotype
dot_plot_df_pc <- (as.data.frame.matrix(prop.table(table(df_meta_data$seurat_clusters, df_meta_data$identity_combined), margin = 2)) * 100)
## make a column for cluster names
dot_plot_df_pc$cluster <- rownames(dot_plot_df_pc)
## melt dataframe for plotting
library(reshape2)
dot_plot_df_pc_melted <- melt(dot_plot_df_pc, variable.name = "cluster")
colnames(dot_plot_df_pc_melted)[2] <- "identity"
## melt the raw number too
dot_plot_df_melted <- melt(dot_plot_df, variable.name = "cluster")
colnames(dot_plot_df_melted)[2] <- "identity"
colnames(dot_plot_df_melted)[3] <- "raw_number"
## merge together
identical(dot_plot_df_melted$cluster, dot_plot_df_pc_melted$cluster)
dot_plot_merged <- cbind(dot_plot_df_melted, dot_plot_df_pc_melted)
dot_plot_merged <- dot_plot_merged[,c(1,2,3,6)]
## redefine order of clusters
dot_plot_merged$cluster <- factor(x = dot_plot_merged$cluster, levels = my_levels)
## where values are zero, add NA
## find wells where it's zero
zero_values <- dot_plot_merged$value == 0
dot_plot_merged$value[zero_values] <- NA
## also do for raw number
zero_values <- dot_plot_merged$raw_number == 0
dot_plot_merged$raw_number[zero_values] <- NA
## reorder x axis:
my_levels_genotype <- c("GCSKO-oom", "GCSKO-29", "GCSKO-3", "GCSKO-2", "GCSKO-19", "GCSKO-28", "GCSKO-21", "GCSKO-13", "GCSKO-17", "GCSKO-20", "GCSKO-10_820", "WT", "WT_10X")
dot_plot_merged$identity <- factor(x = dot_plot_df_pc_melted$identity, levels = my_levels_genotype)
plot
dot_plot_identity <- ggplot(dot_plot_merged, aes(y = factor(cluster), x = factor(identity))) +
## make into a dot plot
geom_point(aes(colour=value, size=raw_number)) +
scale_color_gradient(low="blue", high="red", limits=c( 0, max(dot_plot_df_pc_melted$value)), na.value="white") +
#change the colours
scale_colour_viridis(option = "inferno", guide = "colourbar", na.value="white") +
theme_classic() +
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) +
ylab("Cluster") +
xlab("Identity") +
labs(colour = "% cells of that genotype represented in that cluster", size = "number of cells of that genotype represented in that cluster") +
theme(axis.text.x=element_text(size=12, angle=45, hjust=1, vjust=1), axis.text.y=element_text(size=12), legend.position = "bottom", legend.direction = "horizontal", legend.box = "vertical", text=element_text(size=16, family="Arial")) +
## annotate asex
geom_hline(aes(yintercept = (length(asex_clusters)+0.5))) +
## annotate bipotential
geom_hline(aes(yintercept = (length(c(asex_clusters, bipotential_clusters))+0.5))) +
## annotate sexes
geom_hline(aes(yintercept = (length(c(asex_clusters, bipotential_clusters, male_clusters))+0.5)))
#title = "% genotype population found in each cluster",
print(dot_plot_identity)
maybe the respresentation differences have batch-effects:
#table(tenx.mutant.integrated@meta.data$sort_date, tenx.mutant.integrated@meta.data$identity_updated)
dot_plot_identity + dot_plot_markers + dot_plot_mutant_genes
## extract data from Seurat
seurat.object.all <- tenx.mutant.integrated
# counts
data <- as(as.matrix(GetAssayData(seurat.object.all, assay = "integrated", slot = "data")), 'sparseMatrix')
# meta data
pd <- data.frame(seurat.object.all@meta.data)
## keep only the columns that are relevant
#pData <- pd %>% select(orig.ident, nCount_RNA, nFeature_RNA)
## add gene short name
fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
## Construct monocle cds
monocle.object.all <- new_cell_data_set(expression_data = data, cell_metadata = pd, gene_metadata = fData)
## preprocess
monocle.object.all = preprocess_cds(monocle.object.all, num_dim = 100, norm_method = "none")
## plot variance explained plot
plot_pc_variance_explained(monocle.object.all)
## make monocle UMAP
#monocle.object.all = reduce_dimension(monocle.object.all, reduction_method = "UMAP", preprocess_method = "PCA", umap.metric = "euclidean", umap.n_neighbors = 20, umap.min_dist = 0.5, verbose = FALSE)
#plot_cells(monocle.object.all)
## add UMAP from Seurat
monocle.object.all@int_colData@listData$reducedDims@listData[["UMAP"]] <-seurat.object.all@reductions[["DIM_UMAP"]]@cell.embeddings
plot_cells(monocle.object.all)
## cluster
monocle.object.all = cluster_cells(monocle.object.all)
## plot clusters
plot_cells(monocle.object.all, color_cells_by="partition", group_cells_by="partition", x = 2, y = 1)
## reduce partitions to 1
monocle.object.all@clusters$UMAP$partitions[monocle.object.all@clusters$UMAP$partitions == "2"] <- "1"
#map pseudotime
monocle.object.all = learn_graph(monocle.object.all, learn_graph_control=list(ncenter=500), use_partition = FALSE)
plot_cells(monocle.object.all, color_cells_by="partition", group_cells_by="partition", x = 2, y = 1)
## a helper function to identify the root principal points:
## make cluster 2 the root
get_earliest_principal_node <- function(cds, time_bin="7"){
cell_ids <- which(colData(cds)[, "seurat_clusters"] == time_bin)
closest_vertex <-
cds@principal_graph_aux[["UMAP"]]$pr_graph_cell_proj_closest_vertex
closest_vertex <- as.matrix(closest_vertex[colnames(cds), ])
root_pr_nodes <-
igraph::V(principal_graph(cds)[["UMAP"]])$name[as.numeric(names
(which.max(table(closest_vertex[cell_ids,]))))]
root_pr_nodes
}
## calculate pseudotime
#monocle.object.all = order_cells(monocle.object.all, root_pr_nodes=get_earliest_principal_node(monocle.object.all))
monocle.object.all = order_cells(monocle.object.all)
## used 5 points at the beginning
## plot
umap_pt <- plot_cells(monocle.object.all, color_cells_by = "pseudotime", label_cell_groups=FALSE, cell_size = 1, x = 2, y = 1, label_branch_points=FALSE, label_leaves=FALSE, label_groups_by_cluster=FALSE, label_roots = FALSE) +
coord_fixed() +
theme_void() +
labs(title = "") +
theme(plot.title = element_text(hjust = 0.5, size=20), legend.position="bottom", legend.title=element_text (size=20), legend.text=element_text(size=20)) +
guides(colour = guide_colourbar(barwidth = 10, barheight = 2, title = "Pseudotime"))
## view plot
umap_pt
## help was obtained from here
## https://github.com/satijalab/seurat/issues/1658
save
ggsave("../images_to_export/pt_all_UMAP_pt.png", plot = umap_pt, device = "png", path = NULL, scale = 1, width = 20, height = 20, units = "cm", dpi = 300, limitsize = TRUE)
#install.packages("gganimate")
library(gganimate)
#install.packages("gifski")
#install.packages("av")
#library(gifski)
#library(av)
## make dataframe for plotting
## extract data for GGplot version of this
df_animation <- as.data.frame(monocle.object.all@int_colData@listData$reducedDims@listData[["UMAP"]])
## add pt to this data frame:
pt_values <- as.data.frame(pseudotime(monocle.object.all, reduction_method = "UMAP"))
df_animation <- merge(df_animation, pt_values, by="row.names")
rownames(df_animation) <- df_animation$Row.names
colnames(df_animation)[4] <- "pt"
## make the static plot
p <- ggplot(df_animation, aes(x = DIMUMAP_2, y = DIMUMAP_1, colour = pt)) +
geom_point() +
scale_colour_viridis_c(option = "plasma") +
coord_fixed() +
theme_void() +
theme(legend.position = "none")
## view plot
plot(p)
## make animated plot
## make a category for animation
#df_animation$group <- cut(df_animation$pt, 15)
anim <- p +
transition_time(pt) +
shadow_mark()
animate(anim, height = 3, width = 3, units = "in", res = 150, bg = 'transparent')
## to change the resolution - https://stackoverflow.com/questions/49058567/define-size-for-gif-created-by-gganimate-change-dimension-resolution
Save animation
anim_save("animated_UMAP_transparent_bg.gif", path = "../images_to_export/")
## extract pt values
pt_values <- as.data.frame(pseudotime(monocle.object.all, reduction_method = "UMAP"))
tenx.mutant.integrated <- AddMetaData(tenx.mutant.integrated, pt_values, "old_pt_values")
make composite pseudotime/ID figure
# 1 = blue - "#0052c5"
# 2 = red - "#a52b1e"
# 3 = green - "#016c00"
# 4 = yellow - "#ffe400"
#pal_sex <- c("#0052c5","#ffe400", "#a52b1e", "#016c00")
## extract pseodtime numbers and identity of cells to a dataframe
df_pt_id <- tenx.mutant.integrated@meta.data[,c("old_pt_values", "cluster_colours_figure")]
## inspect possible values
list_of_sexes <- names(table(df_pt_id$cluster_colours_figure))
## make a new column
df_pt_id$colour <- NA
## make colour ramps
asex_ramp <- colorRampPalette(c("#D5E3F5", "#0052c5"))
male_ramp <- colorRampPalette(c("white", "yellow", "#016c00"))
female_ramp <- colorRampPalette(c("yellow", "#a52b1e"))
bipot_ramp <- colorRampPalette(c("white", "#ffe400"))
## re-classify cells removed from sexual branch above:
df_pt_id[which(rownames(df_pt_id) %in% remove_cells), ]$cluster_colours_figure <- "Asexual"
## assign values to each cluster
## help here: https://stackoverflow.com/questions/9946630/colour-points-in-a-plot-differently-depending-on-a-vector-of-values
df_pt_id[df_pt_id$cluster_colours_figure == "Asexual", ]$colour <- asex_ramp(100)[as.numeric(cut(df_pt_id[df_pt_id$cluster_colours_figure == "Asexual", ]$old_pt_values,breaks = 100))]
df_pt_id[df_pt_id$cluster_colours_figure == "Male", ]$colour <- male_ramp(100)[as.numeric(cut(df_pt_id[df_pt_id$cluster_colours_figure == "Male", ]$old_pt_values,breaks = 100))]
df_pt_id[df_pt_id$cluster_colours_figure == "Female", ]$colour <- female_ramp(100)[as.numeric(cut(df_pt_id[df_pt_id$cluster_colours_figure == "Female", ]$old_pt_values,breaks = 100))]
df_pt_id[df_pt_id$cluster_colours_figure == "Bipotential", ]$colour <- bipot_ramp(100)[as.numeric(cut(df_pt_id[df_pt_id$cluster_colours_figure == "Bipotential", ]$old_pt_values,breaks = 100))]
## check everything has a value
#table(is.na(df_pt_id$colour))
## make into a df
#df_pt_id <- df_pt_id[ ,"colour", drop = FALSE]
## add back to seurat object
df <- df_pt_id[ ,"colour", drop = FALSE]
tenx.mutant.integrated <- AddMetaData(tenx.mutant.integrated, df, "pt_id_cols")
rm(df)
## plot
## extract UMAP coords
df_umap_plot <- tenx.mutant.integrated@reductions[["DIM_UMAP"]]@cell.embeddings
df_umap_plot <- merge(df_umap_plot, df_pt_id, by=0, all=TRUE)
## make ggplot
umap_id_pt <- ggplot(df_umap_plot, aes(x = DIMUMAP_2, y = DIMUMAP_1)) +
geom_point(col = df_umap_plot$colour) +
theme_void() +
coord_fixed()
umap_id_pt
test <- df_umap_plot[which(df_umap_plot$cluster_colours_figure == "Male"), ]
ggplot(test, aes(x = DIMUMAP_2, y = DIMUMAP_1)) +
geom_point() +
theme_void()
## add old pt values
## these values are calculated in hte GCSKO_pseudotime_allcells.Rmd script and so were added after the object was split into sex only.
tenx.all <- readRDS("../data_to_export/tenx.mutant.integrated.RDS")
tenx.all.meta <- as.data.frame(tenx.all@meta.data)
tenx.all.meta <- tenx.all.meta[which(rownames(tenx.all.meta) %in% rownames(tenx.mutant.integrated.sex@meta.data)),]
tenx.mutant.integrated.sex <- AddMetaData(tenx.mutant.integrated.sex, tenx.all.meta$old_pt_values, col.name = "old_pt_values")
## then remove these objects so they don't use up memory
rm(tenx.all, tenx.all.meta)
This has been replaced by a separate set of scripts that focus on wild-type cells
wild_type_cells <- rownames(tenx.mutant.integrated@meta.data[tenx.mutant.integrated@meta.data$genotype_combined == "WT", ])
tenx.mutant.integrated.wt <- subset(tenx.mutant.integrated, cells = wild_type_cells)
#FeaturePlot(tenx.mutant.integrated.wt, features = "PBANKA-1101300", coord.fixed = TRUE, min.cutoff = "q1", dims = c(2,1), reduction = "DIM_UMAP", pt.size = 1, order = TRUE)
## Run the standard workflow for visualization and clustering
#tenx.mutant.integrated.wt <- ScaleData(tenx.mutant.integrated.wt, verbose = FALSE)
#tenx.mutant.integrated.wt <- RunPCA(tenx.mutant.integrated.wt, npcs = 30, verbose = FALSE)
## inspect PCs
#ElbowPlot(tenx.mutant.integrated.wt, ndims = 30, reduction = "pca")
## Run optimised UMAP
#tenx.mutant.integrated.wt <- RunUMAP(tenx.mutant.integrated.wt, reduction = "pca", dims = 1:10, n.neighbors = 150, seed.use = 1234, min.dist = 0.4, repulsion.strength = 0.03, local.connectivity = 150)
## plot
#dp1 <- DimPlot(tenx.mutant.integrated.wt, label = TRUE, repel = FALSE, pt.size = 0.05, dims = c(2,1), group.by = "experiment") +
## fix the axis
#coord_fixed() +
## reverse the scale
#scale_y_reverse()
## view
#dp1